polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile`](Dialect::transpile) to another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{
162 Expression, From, Function, FunctionBody, Identifier, Join, Null, OrderBy, OutputClause,
163 TableRef, With,
164};
165use crate::generator::{Generator, GeneratorConfig};
166use crate::parser::Parser;
167use crate::tokens::{Token, TokenType, Tokenizer, TokenizerConfig};
168use serde::{Deserialize, Serialize};
169use std::collections::HashMap;
170use std::sync::{Arc, LazyLock, RwLock};
171
172/// Enumeration of all supported SQL dialects.
173///
174/// Each variant corresponds to a specific SQL database engine or query language.
175/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
176/// and is used as the default when no dialect is specified.
177///
178/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
179/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
181#[serde(rename_all = "lowercase")]
182pub enum DialectType {
183 /// Standard SQL with no dialect-specific behavior (default).
184 Generic,
185 /// PostgreSQL -- advanced open-source relational database.
186 PostgreSQL,
187 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
188 MySQL,
189 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
190 BigQuery,
191 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
192 Snowflake,
193 /// DuckDB -- in-process analytical database with modern SQL extensions.
194 DuckDB,
195 /// SQLite -- lightweight embedded relational database.
196 SQLite,
197 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
198 Hive,
199 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
200 Spark,
201 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
202 Trino,
203 /// PrestoDB -- distributed SQL query engine for big data.
204 Presto,
205 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
206 Redshift,
207 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
208 TSQL,
209 /// Oracle Database -- commercial relational database with PL/SQL extensions.
210 Oracle,
211 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
212 ClickHouse,
213 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
214 Databricks,
215 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
216 Athena,
217 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
218 Teradata,
219 /// Apache Doris -- real-time analytical database (MySQL-compatible).
220 Doris,
221 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
222 StarRocks,
223 /// Materialize -- streaming SQL database built on differential dataflow.
224 Materialize,
225 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
226 RisingWave,
227 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
228 SingleStore,
229 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
230 CockroachDB,
231 /// TiDB -- distributed HTAP database with MySQL compatibility.
232 TiDB,
233 /// Apache Druid -- real-time analytics database.
234 Druid,
235 /// Apache Solr -- search platform with SQL interface.
236 Solr,
237 /// Tableau -- data visualization platform with its own SQL dialect.
238 Tableau,
239 /// Dune Analytics -- blockchain analytics SQL engine.
240 Dune,
241 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
242 Fabric,
243 /// Apache Drill -- schema-free SQL query engine for big data.
244 Drill,
245 /// Dremio -- data lakehouse platform with Arrow-based query engine.
246 Dremio,
247 /// Exasol -- in-memory analytic database.
248 Exasol,
249 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
250 DataFusion,
251}
252
253impl Default for DialectType {
254 fn default() -> Self {
255 DialectType::Generic
256 }
257}
258
259impl std::fmt::Display for DialectType {
260 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261 match self {
262 DialectType::Generic => write!(f, "generic"),
263 DialectType::PostgreSQL => write!(f, "postgresql"),
264 DialectType::MySQL => write!(f, "mysql"),
265 DialectType::BigQuery => write!(f, "bigquery"),
266 DialectType::Snowflake => write!(f, "snowflake"),
267 DialectType::DuckDB => write!(f, "duckdb"),
268 DialectType::SQLite => write!(f, "sqlite"),
269 DialectType::Hive => write!(f, "hive"),
270 DialectType::Spark => write!(f, "spark"),
271 DialectType::Trino => write!(f, "trino"),
272 DialectType::Presto => write!(f, "presto"),
273 DialectType::Redshift => write!(f, "redshift"),
274 DialectType::TSQL => write!(f, "tsql"),
275 DialectType::Oracle => write!(f, "oracle"),
276 DialectType::ClickHouse => write!(f, "clickhouse"),
277 DialectType::Databricks => write!(f, "databricks"),
278 DialectType::Athena => write!(f, "athena"),
279 DialectType::Teradata => write!(f, "teradata"),
280 DialectType::Doris => write!(f, "doris"),
281 DialectType::StarRocks => write!(f, "starrocks"),
282 DialectType::Materialize => write!(f, "materialize"),
283 DialectType::RisingWave => write!(f, "risingwave"),
284 DialectType::SingleStore => write!(f, "singlestore"),
285 DialectType::CockroachDB => write!(f, "cockroachdb"),
286 DialectType::TiDB => write!(f, "tidb"),
287 DialectType::Druid => write!(f, "druid"),
288 DialectType::Solr => write!(f, "solr"),
289 DialectType::Tableau => write!(f, "tableau"),
290 DialectType::Dune => write!(f, "dune"),
291 DialectType::Fabric => write!(f, "fabric"),
292 DialectType::Drill => write!(f, "drill"),
293 DialectType::Dremio => write!(f, "dremio"),
294 DialectType::Exasol => write!(f, "exasol"),
295 DialectType::DataFusion => write!(f, "datafusion"),
296 }
297 }
298}
299
300impl std::str::FromStr for DialectType {
301 type Err = crate::error::Error;
302
303 fn from_str(s: &str) -> Result<Self> {
304 match s.to_ascii_lowercase().as_str() {
305 "generic" | "" => Ok(DialectType::Generic),
306 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
307 "mysql" => Ok(DialectType::MySQL),
308 "bigquery" => Ok(DialectType::BigQuery),
309 "snowflake" => Ok(DialectType::Snowflake),
310 "duckdb" => Ok(DialectType::DuckDB),
311 "sqlite" => Ok(DialectType::SQLite),
312 "hive" => Ok(DialectType::Hive),
313 "spark" | "spark2" => Ok(DialectType::Spark),
314 "trino" => Ok(DialectType::Trino),
315 "presto" => Ok(DialectType::Presto),
316 "redshift" => Ok(DialectType::Redshift),
317 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
318 "oracle" => Ok(DialectType::Oracle),
319 "clickhouse" => Ok(DialectType::ClickHouse),
320 "databricks" => Ok(DialectType::Databricks),
321 "athena" => Ok(DialectType::Athena),
322 "teradata" => Ok(DialectType::Teradata),
323 "doris" => Ok(DialectType::Doris),
324 "starrocks" => Ok(DialectType::StarRocks),
325 "materialize" => Ok(DialectType::Materialize),
326 "risingwave" => Ok(DialectType::RisingWave),
327 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
328 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
329 "tidb" => Ok(DialectType::TiDB),
330 "druid" => Ok(DialectType::Druid),
331 "solr" => Ok(DialectType::Solr),
332 "tableau" => Ok(DialectType::Tableau),
333 "dune" => Ok(DialectType::Dune),
334 "fabric" => Ok(DialectType::Fabric),
335 "drill" => Ok(DialectType::Drill),
336 "dremio" => Ok(DialectType::Dremio),
337 "exasol" => Ok(DialectType::Exasol),
338 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
339 _ => Err(crate::error::Error::parse(
340 format!("Unknown dialect: {}", s),
341 0,
342 0,
343 0,
344 0,
345 )),
346 }
347 }
348}
349
350/// Trait that each concrete SQL dialect must implement.
351///
352/// `DialectImpl` provides the configuration hooks and per-expression transform logic
353/// that distinguish one dialect from another. Implementors supply:
354///
355/// - A [`DialectType`] identifier.
356/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
357/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
358/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
359/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
360/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
361///
362/// The default implementations are no-ops, so a minimal dialect only needs to provide
363/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
364/// standard SQL.
365pub trait DialectImpl {
366 /// Returns the [`DialectType`] that identifies this dialect.
367 fn dialect_type(&self) -> DialectType;
368
369 /// Returns the tokenizer configuration for this dialect.
370 ///
371 /// Override to customize identifier quoting characters, string escape rules,
372 /// comment styles, and other lexing behavior.
373 fn tokenizer_config(&self) -> TokenizerConfig {
374 TokenizerConfig::default()
375 }
376
377 /// Returns the generator configuration for this dialect.
378 ///
379 /// Override to customize identifier quoting style, function name casing,
380 /// keyword casing, and other SQL generation behavior.
381 fn generator_config(&self) -> GeneratorConfig {
382 GeneratorConfig::default()
383 }
384
385 /// Returns a generator configuration tailored to a specific expression.
386 ///
387 /// Override this for hybrid dialects like Athena that route to different SQL engines
388 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
389 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
390 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
391 self.generator_config()
392 }
393
394 /// Transforms a single expression node for this dialect, without recursing into children.
395 ///
396 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
397 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
398 /// typically include function renaming, operator substitution, and type mapping.
399 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
400 Ok(expr)
401 }
402
403 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
404 ///
405 /// Override this to apply structural rewrites that must see the entire tree at once,
406 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
407 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
408 fn preprocess(&self, expr: Expression) -> Result<Expression> {
409 Ok(expr)
410 }
411}
412
413/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
414/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
415///
416/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
417/// and then nested element/field types are recursed into. This ensures that dialect-level
418/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
419fn transform_data_type_recursive<F>(
420 dt: crate::expressions::DataType,
421 transform_fn: &F,
422) -> Result<crate::expressions::DataType>
423where
424 F: Fn(Expression) -> Result<Expression>,
425{
426 use crate::expressions::DataType;
427 // First, transform the outermost type through the expression system
428 let dt_expr = transform_fn(Expression::DataType(dt))?;
429 let dt = match dt_expr {
430 Expression::DataType(d) => d,
431 _ => {
432 return Ok(match dt_expr {
433 _ => DataType::Custom {
434 name: "UNKNOWN".to_string(),
435 },
436 })
437 }
438 };
439 // Then recurse into nested types
440 match dt {
441 DataType::Array {
442 element_type,
443 dimension,
444 } => {
445 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
446 Ok(DataType::Array {
447 element_type: Box::new(inner),
448 dimension,
449 })
450 }
451 DataType::List { element_type } => {
452 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
453 Ok(DataType::List {
454 element_type: Box::new(inner),
455 })
456 }
457 DataType::Struct { fields, nested } => {
458 let mut new_fields = Vec::new();
459 for mut field in fields {
460 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
461 new_fields.push(field);
462 }
463 Ok(DataType::Struct {
464 fields: new_fields,
465 nested,
466 })
467 }
468 DataType::Map {
469 key_type,
470 value_type,
471 } => {
472 let k = transform_data_type_recursive(*key_type, transform_fn)?;
473 let v = transform_data_type_recursive(*value_type, transform_fn)?;
474 Ok(DataType::Map {
475 key_type: Box::new(k),
476 value_type: Box::new(v),
477 })
478 }
479 other => Ok(other),
480 }
481}
482
483/// Convert DuckDB C-style format strings to Presto C-style format strings.
484/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
485#[cfg(feature = "transpile")]
486fn duckdb_to_presto_format(fmt: &str) -> String {
487 // Order matters: handle longer patterns first to avoid partial replacements
488 let mut result = fmt.to_string();
489 // First pass: mark multi-char patterns with placeholders
490 result = result.replace("%-m", "\x01NOPADM\x01");
491 result = result.replace("%-d", "\x01NOPADD\x01");
492 result = result.replace("%-I", "\x01NOPADI\x01");
493 result = result.replace("%-H", "\x01NOPADH\x01");
494 result = result.replace("%H:%M:%S", "\x01HMS\x01");
495 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
496 // Now convert individual specifiers
497 result = result.replace("%M", "%i");
498 result = result.replace("%S", "%s");
499 // Restore multi-char patterns with Presto equivalents
500 result = result.replace("\x01NOPADM\x01", "%c");
501 result = result.replace("\x01NOPADD\x01", "%e");
502 result = result.replace("\x01NOPADI\x01", "%l");
503 result = result.replace("\x01NOPADH\x01", "%k");
504 result = result.replace("\x01HMS\x01", "%T");
505 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
506 result
507}
508
509/// Convert DuckDB C-style format strings to BigQuery format strings.
510/// BigQuery uses a mix of strftime-like directives.
511#[cfg(feature = "transpile")]
512fn duckdb_to_bigquery_format(fmt: &str) -> String {
513 let mut result = fmt.to_string();
514 // Handle longer patterns first
515 result = result.replace("%-d", "%e");
516 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
517 result = result.replace("%Y-%m-%d", "%F");
518 result = result.replace("%H:%M:%S", "%T");
519 result
520}
521
522#[derive(Debug)]
523enum TransformTask {
524 Visit(Expression),
525 Finish(FinishTask),
526}
527
528#[derive(Debug)]
529enum FinishTask {
530 Unary(Expression),
531 Binary(Expression),
532 CastLike(Expression),
533 List(Expression, usize),
534 From(crate::expressions::From, usize),
535 Select(SelectFrame),
536 SetOp(Expression),
537}
538
539#[derive(Debug)]
540struct SelectFrame {
541 select: Box<crate::expressions::Select>,
542 expr_count: usize,
543 from_present: bool,
544 where_present: bool,
545 group_by_count: usize,
546 having_present: bool,
547 qualify_present: bool,
548}
549
550fn transform_pop_result(results: &mut Vec<Expression>) -> Result<Expression> {
551 results
552 .pop()
553 .ok_or_else(|| crate::error::Error::Internal("transform stack underflow".to_string()))
554}
555
556fn transform_pop_results(results: &mut Vec<Expression>, count: usize) -> Result<Vec<Expression>> {
557 if results.len() < count {
558 return Err(crate::error::Error::Internal(
559 "transform result stack underflow".to_string(),
560 ));
561 }
562 Ok(results.split_off(results.len() - count))
563}
564
565/// Applies a transform function bottom-up through an entire expression tree.
566///
567/// The public entrypoint uses an explicit task stack for the recursion-heavy shapes
568/// that dominate deeply nested SQL (nested SELECT/FROM/SUBQUERY chains, set-operation
569/// trees, and common binary/unary expression chains). Less common shapes currently
570/// reuse the reference recursive implementation so semantics stay identical while
571/// the hot path avoids stack growth.
572pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
573where
574 F: Fn(Expression) -> Result<Expression>,
575{
576 #[cfg(feature = "stacker")]
577 {
578 let red_zone = if cfg!(debug_assertions) {
579 4 * 1024 * 1024
580 } else {
581 1024 * 1024
582 };
583 stacker::maybe_grow(red_zone, 8 * 1024 * 1024, move || {
584 transform_recursive_inner(expr, transform_fn)
585 })
586 }
587 #[cfg(not(feature = "stacker"))]
588 {
589 transform_recursive_inner(expr, transform_fn)
590 }
591}
592
593fn transform_recursive_inner<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
594where
595 F: Fn(Expression) -> Result<Expression>,
596{
597 let mut tasks = vec![TransformTask::Visit(expr)];
598 let mut results = Vec::new();
599
600 while let Some(task) = tasks.pop() {
601 match task {
602 TransformTask::Visit(expr) => {
603 if matches!(
604 &expr,
605 Expression::Literal(_)
606 | Expression::Boolean(_)
607 | Expression::Null(_)
608 | Expression::Identifier(_)
609 | Expression::Star(_)
610 | Expression::Parameter(_)
611 | Expression::Placeholder(_)
612 | Expression::SessionParameter(_)
613 ) {
614 results.push(transform_fn(expr)?);
615 continue;
616 }
617
618 match expr {
619 Expression::Alias(mut alias) => {
620 let child = std::mem::replace(&mut alias.this, Expression::Null(Null));
621 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Alias(
622 alias,
623 ))));
624 tasks.push(TransformTask::Visit(child));
625 }
626 Expression::Paren(mut paren) => {
627 let child = std::mem::replace(&mut paren.this, Expression::Null(Null));
628 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Paren(
629 paren,
630 ))));
631 tasks.push(TransformTask::Visit(child));
632 }
633 Expression::Not(mut not) => {
634 let child = std::mem::replace(&mut not.this, Expression::Null(Null));
635 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Not(
636 not,
637 ))));
638 tasks.push(TransformTask::Visit(child));
639 }
640 Expression::Neg(mut neg) => {
641 let child = std::mem::replace(&mut neg.this, Expression::Null(Null));
642 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Neg(
643 neg,
644 ))));
645 tasks.push(TransformTask::Visit(child));
646 }
647 Expression::IsNull(mut expr) => {
648 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
649 tasks.push(TransformTask::Finish(FinishTask::Unary(
650 Expression::IsNull(expr),
651 )));
652 tasks.push(TransformTask::Visit(child));
653 }
654 Expression::IsTrue(mut expr) => {
655 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
656 tasks.push(TransformTask::Finish(FinishTask::Unary(
657 Expression::IsTrue(expr),
658 )));
659 tasks.push(TransformTask::Visit(child));
660 }
661 Expression::IsFalse(mut expr) => {
662 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
663 tasks.push(TransformTask::Finish(FinishTask::Unary(
664 Expression::IsFalse(expr),
665 )));
666 tasks.push(TransformTask::Visit(child));
667 }
668 Expression::Subquery(mut subquery) => {
669 let child = std::mem::replace(&mut subquery.this, Expression::Null(Null));
670 tasks.push(TransformTask::Finish(FinishTask::Unary(
671 Expression::Subquery(subquery),
672 )));
673 tasks.push(TransformTask::Visit(child));
674 }
675 Expression::Exists(mut exists) => {
676 let child = std::mem::replace(&mut exists.this, Expression::Null(Null));
677 tasks.push(TransformTask::Finish(FinishTask::Unary(
678 Expression::Exists(exists),
679 )));
680 tasks.push(TransformTask::Visit(child));
681 }
682 Expression::TableArgument(mut arg) => {
683 let child = std::mem::replace(&mut arg.this, Expression::Null(Null));
684 tasks.push(TransformTask::Finish(FinishTask::Unary(
685 Expression::TableArgument(arg),
686 )));
687 tasks.push(TransformTask::Visit(child));
688 }
689 Expression::And(mut op) => {
690 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
691 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
692 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::And(
693 op,
694 ))));
695 tasks.push(TransformTask::Visit(right));
696 tasks.push(TransformTask::Visit(left));
697 }
698 Expression::Or(mut op) => {
699 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
700 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
701 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Or(
702 op,
703 ))));
704 tasks.push(TransformTask::Visit(right));
705 tasks.push(TransformTask::Visit(left));
706 }
707 Expression::Add(mut op) => {
708 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
709 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
710 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Add(
711 op,
712 ))));
713 tasks.push(TransformTask::Visit(right));
714 tasks.push(TransformTask::Visit(left));
715 }
716 Expression::Sub(mut op) => {
717 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
718 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
719 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Sub(
720 op,
721 ))));
722 tasks.push(TransformTask::Visit(right));
723 tasks.push(TransformTask::Visit(left));
724 }
725 Expression::Mul(mut op) => {
726 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
727 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
728 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mul(
729 op,
730 ))));
731 tasks.push(TransformTask::Visit(right));
732 tasks.push(TransformTask::Visit(left));
733 }
734 Expression::Div(mut op) => {
735 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
736 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
737 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Div(
738 op,
739 ))));
740 tasks.push(TransformTask::Visit(right));
741 tasks.push(TransformTask::Visit(left));
742 }
743 Expression::Eq(mut op) => {
744 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
745 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
746 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Eq(
747 op,
748 ))));
749 tasks.push(TransformTask::Visit(right));
750 tasks.push(TransformTask::Visit(left));
751 }
752 Expression::Lt(mut op) => {
753 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
754 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
755 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lt(
756 op,
757 ))));
758 tasks.push(TransformTask::Visit(right));
759 tasks.push(TransformTask::Visit(left));
760 }
761 Expression::Gt(mut op) => {
762 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
763 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
764 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gt(
765 op,
766 ))));
767 tasks.push(TransformTask::Visit(right));
768 tasks.push(TransformTask::Visit(left));
769 }
770 Expression::Neq(mut op) => {
771 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
772 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
773 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Neq(
774 op,
775 ))));
776 tasks.push(TransformTask::Visit(right));
777 tasks.push(TransformTask::Visit(left));
778 }
779 Expression::Lte(mut op) => {
780 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
781 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
782 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lte(
783 op,
784 ))));
785 tasks.push(TransformTask::Visit(right));
786 tasks.push(TransformTask::Visit(left));
787 }
788 Expression::Gte(mut op) => {
789 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
790 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
791 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gte(
792 op,
793 ))));
794 tasks.push(TransformTask::Visit(right));
795 tasks.push(TransformTask::Visit(left));
796 }
797 Expression::Mod(mut op) => {
798 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
799 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
800 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mod(
801 op,
802 ))));
803 tasks.push(TransformTask::Visit(right));
804 tasks.push(TransformTask::Visit(left));
805 }
806 Expression::Concat(mut op) => {
807 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
808 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
809 tasks.push(TransformTask::Finish(FinishTask::Binary(
810 Expression::Concat(op),
811 )));
812 tasks.push(TransformTask::Visit(right));
813 tasks.push(TransformTask::Visit(left));
814 }
815 Expression::BitwiseAnd(mut op) => {
816 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
817 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
818 tasks.push(TransformTask::Finish(FinishTask::Binary(
819 Expression::BitwiseAnd(op),
820 )));
821 tasks.push(TransformTask::Visit(right));
822 tasks.push(TransformTask::Visit(left));
823 }
824 Expression::BitwiseOr(mut op) => {
825 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
826 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
827 tasks.push(TransformTask::Finish(FinishTask::Binary(
828 Expression::BitwiseOr(op),
829 )));
830 tasks.push(TransformTask::Visit(right));
831 tasks.push(TransformTask::Visit(left));
832 }
833 Expression::BitwiseXor(mut op) => {
834 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
835 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
836 tasks.push(TransformTask::Finish(FinishTask::Binary(
837 Expression::BitwiseXor(op),
838 )));
839 tasks.push(TransformTask::Visit(right));
840 tasks.push(TransformTask::Visit(left));
841 }
842 Expression::Is(mut op) => {
843 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
844 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
845 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Is(
846 op,
847 ))));
848 tasks.push(TransformTask::Visit(right));
849 tasks.push(TransformTask::Visit(left));
850 }
851 Expression::MemberOf(mut op) => {
852 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
853 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
854 tasks.push(TransformTask::Finish(FinishTask::Binary(
855 Expression::MemberOf(op),
856 )));
857 tasks.push(TransformTask::Visit(right));
858 tasks.push(TransformTask::Visit(left));
859 }
860 Expression::ArrayContainsAll(mut op) => {
861 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
862 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
863 tasks.push(TransformTask::Finish(FinishTask::Binary(
864 Expression::ArrayContainsAll(op),
865 )));
866 tasks.push(TransformTask::Visit(right));
867 tasks.push(TransformTask::Visit(left));
868 }
869 Expression::ArrayContainedBy(mut op) => {
870 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
871 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
872 tasks.push(TransformTask::Finish(FinishTask::Binary(
873 Expression::ArrayContainedBy(op),
874 )));
875 tasks.push(TransformTask::Visit(right));
876 tasks.push(TransformTask::Visit(left));
877 }
878 Expression::ArrayOverlaps(mut op) => {
879 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
880 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
881 tasks.push(TransformTask::Finish(FinishTask::Binary(
882 Expression::ArrayOverlaps(op),
883 )));
884 tasks.push(TransformTask::Visit(right));
885 tasks.push(TransformTask::Visit(left));
886 }
887 Expression::TsMatch(mut op) => {
888 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
889 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
890 tasks.push(TransformTask::Finish(FinishTask::Binary(
891 Expression::TsMatch(op),
892 )));
893 tasks.push(TransformTask::Visit(right));
894 tasks.push(TransformTask::Visit(left));
895 }
896 Expression::Adjacent(mut op) => {
897 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
898 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
899 tasks.push(TransformTask::Finish(FinishTask::Binary(
900 Expression::Adjacent(op),
901 )));
902 tasks.push(TransformTask::Visit(right));
903 tasks.push(TransformTask::Visit(left));
904 }
905 Expression::Like(mut like) => {
906 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
907 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
908 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Like(
909 like,
910 ))));
911 tasks.push(TransformTask::Visit(right));
912 tasks.push(TransformTask::Visit(left));
913 }
914 Expression::ILike(mut like) => {
915 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
916 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
917 tasks.push(TransformTask::Finish(FinishTask::Binary(
918 Expression::ILike(like),
919 )));
920 tasks.push(TransformTask::Visit(right));
921 tasks.push(TransformTask::Visit(left));
922 }
923 Expression::Cast(mut cast) => {
924 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
925 tasks.push(TransformTask::Finish(FinishTask::CastLike(
926 Expression::Cast(cast),
927 )));
928 tasks.push(TransformTask::Visit(child));
929 }
930 Expression::TryCast(mut cast) => {
931 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
932 tasks.push(TransformTask::Finish(FinishTask::CastLike(
933 Expression::TryCast(cast),
934 )));
935 tasks.push(TransformTask::Visit(child));
936 }
937 Expression::SafeCast(mut cast) => {
938 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
939 tasks.push(TransformTask::Finish(FinishTask::CastLike(
940 Expression::SafeCast(cast),
941 )));
942 tasks.push(TransformTask::Visit(child));
943 }
944 Expression::Function(mut function) => {
945 let args = std::mem::take(&mut function.args);
946 let count = args.len();
947 tasks.push(TransformTask::Finish(FinishTask::List(
948 Expression::Function(function),
949 count,
950 )));
951 for child in args.into_iter().rev() {
952 tasks.push(TransformTask::Visit(child));
953 }
954 }
955 Expression::Array(mut array) => {
956 let expressions = std::mem::take(&mut array.expressions);
957 let count = expressions.len();
958 tasks.push(TransformTask::Finish(FinishTask::List(
959 Expression::Array(array),
960 count,
961 )));
962 for child in expressions.into_iter().rev() {
963 tasks.push(TransformTask::Visit(child));
964 }
965 }
966 Expression::Tuple(mut tuple) => {
967 let expressions = std::mem::take(&mut tuple.expressions);
968 let count = expressions.len();
969 tasks.push(TransformTask::Finish(FinishTask::List(
970 Expression::Tuple(tuple),
971 count,
972 )));
973 for child in expressions.into_iter().rev() {
974 tasks.push(TransformTask::Visit(child));
975 }
976 }
977 Expression::ArrayFunc(mut array) => {
978 let expressions = std::mem::take(&mut array.expressions);
979 let count = expressions.len();
980 tasks.push(TransformTask::Finish(FinishTask::List(
981 Expression::ArrayFunc(array),
982 count,
983 )));
984 for child in expressions.into_iter().rev() {
985 tasks.push(TransformTask::Visit(child));
986 }
987 }
988 Expression::Coalesce(mut func) => {
989 let expressions = std::mem::take(&mut func.expressions);
990 let count = expressions.len();
991 tasks.push(TransformTask::Finish(FinishTask::List(
992 Expression::Coalesce(func),
993 count,
994 )));
995 for child in expressions.into_iter().rev() {
996 tasks.push(TransformTask::Visit(child));
997 }
998 }
999 Expression::Greatest(mut func) => {
1000 let expressions = std::mem::take(&mut func.expressions);
1001 let count = expressions.len();
1002 tasks.push(TransformTask::Finish(FinishTask::List(
1003 Expression::Greatest(func),
1004 count,
1005 )));
1006 for child in expressions.into_iter().rev() {
1007 tasks.push(TransformTask::Visit(child));
1008 }
1009 }
1010 Expression::Least(mut func) => {
1011 let expressions = std::mem::take(&mut func.expressions);
1012 let count = expressions.len();
1013 tasks.push(TransformTask::Finish(FinishTask::List(
1014 Expression::Least(func),
1015 count,
1016 )));
1017 for child in expressions.into_iter().rev() {
1018 tasks.push(TransformTask::Visit(child));
1019 }
1020 }
1021 Expression::ArrayConcat(mut func) => {
1022 let expressions = std::mem::take(&mut func.expressions);
1023 let count = expressions.len();
1024 tasks.push(TransformTask::Finish(FinishTask::List(
1025 Expression::ArrayConcat(func),
1026 count,
1027 )));
1028 for child in expressions.into_iter().rev() {
1029 tasks.push(TransformTask::Visit(child));
1030 }
1031 }
1032 Expression::ArrayIntersect(mut func) => {
1033 let expressions = std::mem::take(&mut func.expressions);
1034 let count = expressions.len();
1035 tasks.push(TransformTask::Finish(FinishTask::List(
1036 Expression::ArrayIntersect(func),
1037 count,
1038 )));
1039 for child in expressions.into_iter().rev() {
1040 tasks.push(TransformTask::Visit(child));
1041 }
1042 }
1043 Expression::ArrayZip(mut func) => {
1044 let expressions = std::mem::take(&mut func.expressions);
1045 let count = expressions.len();
1046 tasks.push(TransformTask::Finish(FinishTask::List(
1047 Expression::ArrayZip(func),
1048 count,
1049 )));
1050 for child in expressions.into_iter().rev() {
1051 tasks.push(TransformTask::Visit(child));
1052 }
1053 }
1054 Expression::MapConcat(mut func) => {
1055 let expressions = std::mem::take(&mut func.expressions);
1056 let count = expressions.len();
1057 tasks.push(TransformTask::Finish(FinishTask::List(
1058 Expression::MapConcat(func),
1059 count,
1060 )));
1061 for child in expressions.into_iter().rev() {
1062 tasks.push(TransformTask::Visit(child));
1063 }
1064 }
1065 Expression::JsonArray(mut func) => {
1066 let expressions = std::mem::take(&mut func.expressions);
1067 let count = expressions.len();
1068 tasks.push(TransformTask::Finish(FinishTask::List(
1069 Expression::JsonArray(func),
1070 count,
1071 )));
1072 for child in expressions.into_iter().rev() {
1073 tasks.push(TransformTask::Visit(child));
1074 }
1075 }
1076 Expression::From(mut from) => {
1077 let expressions = std::mem::take(&mut from.expressions);
1078 let count = expressions.len();
1079 tasks.push(TransformTask::Finish(FinishTask::From(*from, count)));
1080 for child in expressions.into_iter().rev() {
1081 tasks.push(TransformTask::Visit(child));
1082 }
1083 }
1084 Expression::Select(mut select) => {
1085 let expressions = std::mem::take(&mut select.expressions);
1086 let expr_count = expressions.len();
1087
1088 let from_info = select.from.take().map(|mut from| {
1089 let children = std::mem::take(&mut from.expressions);
1090 (from, children)
1091 });
1092 let from_present = from_info.is_some();
1093
1094 let where_child = select.where_clause.as_mut().map(|where_clause| {
1095 std::mem::replace(&mut where_clause.this, Expression::Null(Null))
1096 });
1097 let where_present = where_child.is_some();
1098
1099 let group_expressions = select
1100 .group_by
1101 .as_mut()
1102 .map(|group_by| std::mem::take(&mut group_by.expressions))
1103 .unwrap_or_default();
1104 let group_by_count = group_expressions.len();
1105
1106 let having_child = select.having.as_mut().map(|having| {
1107 std::mem::replace(&mut having.this, Expression::Null(Null))
1108 });
1109 let having_present = having_child.is_some();
1110
1111 let qualify_child = select.qualify.as_mut().map(|qualify| {
1112 std::mem::replace(&mut qualify.this, Expression::Null(Null))
1113 });
1114 let qualify_present = qualify_child.is_some();
1115
1116 tasks.push(TransformTask::Finish(FinishTask::Select(SelectFrame {
1117 select,
1118 expr_count,
1119 from_present,
1120 where_present,
1121 group_by_count,
1122 having_present,
1123 qualify_present,
1124 })));
1125
1126 if let Some(child) = qualify_child {
1127 tasks.push(TransformTask::Visit(child));
1128 }
1129 if let Some(child) = having_child {
1130 tasks.push(TransformTask::Visit(child));
1131 }
1132 for child in group_expressions.into_iter().rev() {
1133 tasks.push(TransformTask::Visit(child));
1134 }
1135 if let Some(child) = where_child {
1136 tasks.push(TransformTask::Visit(child));
1137 }
1138 if let Some((from, children)) = from_info {
1139 tasks.push(TransformTask::Finish(FinishTask::From(
1140 from,
1141 children.len(),
1142 )));
1143 for child in children.into_iter().rev() {
1144 tasks.push(TransformTask::Visit(child));
1145 }
1146 }
1147 for child in expressions.into_iter().rev() {
1148 tasks.push(TransformTask::Visit(child));
1149 }
1150 }
1151 Expression::Union(mut union) => {
1152 let right = std::mem::replace(&mut union.right, Expression::Null(Null));
1153 let left = std::mem::replace(&mut union.left, Expression::Null(Null));
1154 tasks.push(TransformTask::Finish(FinishTask::SetOp(Expression::Union(
1155 union,
1156 ))));
1157 tasks.push(TransformTask::Visit(right));
1158 tasks.push(TransformTask::Visit(left));
1159 }
1160 Expression::Intersect(mut intersect) => {
1161 let right = std::mem::replace(&mut intersect.right, Expression::Null(Null));
1162 let left = std::mem::replace(&mut intersect.left, Expression::Null(Null));
1163 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1164 Expression::Intersect(intersect),
1165 )));
1166 tasks.push(TransformTask::Visit(right));
1167 tasks.push(TransformTask::Visit(left));
1168 }
1169 Expression::Except(mut except) => {
1170 let right = std::mem::replace(&mut except.right, Expression::Null(Null));
1171 let left = std::mem::replace(&mut except.left, Expression::Null(Null));
1172 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1173 Expression::Except(except),
1174 )));
1175 tasks.push(TransformTask::Visit(right));
1176 tasks.push(TransformTask::Visit(left));
1177 }
1178 other => {
1179 results.push(transform_recursive_reference(other, transform_fn)?);
1180 }
1181 }
1182 }
1183 TransformTask::Finish(finish) => match finish {
1184 FinishTask::Unary(expr) => {
1185 let child = transform_pop_result(&mut results)?;
1186 let rebuilt = match expr {
1187 Expression::Alias(mut alias) => {
1188 alias.this = child;
1189 Expression::Alias(alias)
1190 }
1191 Expression::Paren(mut paren) => {
1192 paren.this = child;
1193 Expression::Paren(paren)
1194 }
1195 Expression::Not(mut not) => {
1196 not.this = child;
1197 Expression::Not(not)
1198 }
1199 Expression::Neg(mut neg) => {
1200 neg.this = child;
1201 Expression::Neg(neg)
1202 }
1203 Expression::IsNull(mut expr) => {
1204 expr.this = child;
1205 Expression::IsNull(expr)
1206 }
1207 Expression::IsTrue(mut expr) => {
1208 expr.this = child;
1209 Expression::IsTrue(expr)
1210 }
1211 Expression::IsFalse(mut expr) => {
1212 expr.this = child;
1213 Expression::IsFalse(expr)
1214 }
1215 Expression::Subquery(mut subquery) => {
1216 subquery.this = child;
1217 Expression::Subquery(subquery)
1218 }
1219 Expression::Exists(mut exists) => {
1220 exists.this = child;
1221 Expression::Exists(exists)
1222 }
1223 Expression::TableArgument(mut arg) => {
1224 arg.this = child;
1225 Expression::TableArgument(arg)
1226 }
1227 _ => {
1228 return Err(crate::error::Error::Internal(
1229 "unexpected unary transform task".to_string(),
1230 ));
1231 }
1232 };
1233 results.push(transform_fn(rebuilt)?);
1234 }
1235 FinishTask::Binary(expr) => {
1236 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1237 let left = children.next().expect("left child");
1238 let right = children.next().expect("right child");
1239 let rebuilt = match expr {
1240 Expression::And(mut op) => {
1241 op.left = left;
1242 op.right = right;
1243 Expression::And(op)
1244 }
1245 Expression::Or(mut op) => {
1246 op.left = left;
1247 op.right = right;
1248 Expression::Or(op)
1249 }
1250 Expression::Add(mut op) => {
1251 op.left = left;
1252 op.right = right;
1253 Expression::Add(op)
1254 }
1255 Expression::Sub(mut op) => {
1256 op.left = left;
1257 op.right = right;
1258 Expression::Sub(op)
1259 }
1260 Expression::Mul(mut op) => {
1261 op.left = left;
1262 op.right = right;
1263 Expression::Mul(op)
1264 }
1265 Expression::Div(mut op) => {
1266 op.left = left;
1267 op.right = right;
1268 Expression::Div(op)
1269 }
1270 Expression::Eq(mut op) => {
1271 op.left = left;
1272 op.right = right;
1273 Expression::Eq(op)
1274 }
1275 Expression::Lt(mut op) => {
1276 op.left = left;
1277 op.right = right;
1278 Expression::Lt(op)
1279 }
1280 Expression::Gt(mut op) => {
1281 op.left = left;
1282 op.right = right;
1283 Expression::Gt(op)
1284 }
1285 Expression::Neq(mut op) => {
1286 op.left = left;
1287 op.right = right;
1288 Expression::Neq(op)
1289 }
1290 Expression::Lte(mut op) => {
1291 op.left = left;
1292 op.right = right;
1293 Expression::Lte(op)
1294 }
1295 Expression::Gte(mut op) => {
1296 op.left = left;
1297 op.right = right;
1298 Expression::Gte(op)
1299 }
1300 Expression::Mod(mut op) => {
1301 op.left = left;
1302 op.right = right;
1303 Expression::Mod(op)
1304 }
1305 Expression::Concat(mut op) => {
1306 op.left = left;
1307 op.right = right;
1308 Expression::Concat(op)
1309 }
1310 Expression::BitwiseAnd(mut op) => {
1311 op.left = left;
1312 op.right = right;
1313 Expression::BitwiseAnd(op)
1314 }
1315 Expression::BitwiseOr(mut op) => {
1316 op.left = left;
1317 op.right = right;
1318 Expression::BitwiseOr(op)
1319 }
1320 Expression::BitwiseXor(mut op) => {
1321 op.left = left;
1322 op.right = right;
1323 Expression::BitwiseXor(op)
1324 }
1325 Expression::Is(mut op) => {
1326 op.left = left;
1327 op.right = right;
1328 Expression::Is(op)
1329 }
1330 Expression::MemberOf(mut op) => {
1331 op.left = left;
1332 op.right = right;
1333 Expression::MemberOf(op)
1334 }
1335 Expression::ArrayContainsAll(mut op) => {
1336 op.left = left;
1337 op.right = right;
1338 Expression::ArrayContainsAll(op)
1339 }
1340 Expression::ArrayContainedBy(mut op) => {
1341 op.left = left;
1342 op.right = right;
1343 Expression::ArrayContainedBy(op)
1344 }
1345 Expression::ArrayOverlaps(mut op) => {
1346 op.left = left;
1347 op.right = right;
1348 Expression::ArrayOverlaps(op)
1349 }
1350 Expression::TsMatch(mut op) => {
1351 op.left = left;
1352 op.right = right;
1353 Expression::TsMatch(op)
1354 }
1355 Expression::Adjacent(mut op) => {
1356 op.left = left;
1357 op.right = right;
1358 Expression::Adjacent(op)
1359 }
1360 Expression::Like(mut like) => {
1361 like.left = left;
1362 like.right = right;
1363 Expression::Like(like)
1364 }
1365 Expression::ILike(mut like) => {
1366 like.left = left;
1367 like.right = right;
1368 Expression::ILike(like)
1369 }
1370 _ => {
1371 return Err(crate::error::Error::Internal(
1372 "unexpected binary transform task".to_string(),
1373 ));
1374 }
1375 };
1376 results.push(transform_fn(rebuilt)?);
1377 }
1378 FinishTask::CastLike(expr) => {
1379 let child = transform_pop_result(&mut results)?;
1380 let rebuilt = match expr {
1381 Expression::Cast(mut cast) => {
1382 cast.this = child;
1383 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1384 Expression::Cast(cast)
1385 }
1386 Expression::TryCast(mut cast) => {
1387 cast.this = child;
1388 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1389 Expression::TryCast(cast)
1390 }
1391 Expression::SafeCast(mut cast) => {
1392 cast.this = child;
1393 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1394 Expression::SafeCast(cast)
1395 }
1396 _ => {
1397 return Err(crate::error::Error::Internal(
1398 "unexpected cast transform task".to_string(),
1399 ));
1400 }
1401 };
1402 results.push(transform_fn(rebuilt)?);
1403 }
1404 FinishTask::List(expr, count) => {
1405 let children = transform_pop_results(&mut results, count)?;
1406 let rebuilt = match expr {
1407 Expression::Function(mut function) => {
1408 function.args = children;
1409 Expression::Function(function)
1410 }
1411 Expression::Array(mut array) => {
1412 array.expressions = children;
1413 Expression::Array(array)
1414 }
1415 Expression::Tuple(mut tuple) => {
1416 tuple.expressions = children;
1417 Expression::Tuple(tuple)
1418 }
1419 Expression::ArrayFunc(mut array) => {
1420 array.expressions = children;
1421 Expression::ArrayFunc(array)
1422 }
1423 Expression::Coalesce(mut func) => {
1424 func.expressions = children;
1425 Expression::Coalesce(func)
1426 }
1427 Expression::Greatest(mut func) => {
1428 func.expressions = children;
1429 Expression::Greatest(func)
1430 }
1431 Expression::Least(mut func) => {
1432 func.expressions = children;
1433 Expression::Least(func)
1434 }
1435 Expression::ArrayConcat(mut func) => {
1436 func.expressions = children;
1437 Expression::ArrayConcat(func)
1438 }
1439 Expression::ArrayIntersect(mut func) => {
1440 func.expressions = children;
1441 Expression::ArrayIntersect(func)
1442 }
1443 Expression::ArrayZip(mut func) => {
1444 func.expressions = children;
1445 Expression::ArrayZip(func)
1446 }
1447 Expression::MapConcat(mut func) => {
1448 func.expressions = children;
1449 Expression::MapConcat(func)
1450 }
1451 Expression::JsonArray(mut func) => {
1452 func.expressions = children;
1453 Expression::JsonArray(func)
1454 }
1455 _ => {
1456 return Err(crate::error::Error::Internal(
1457 "unexpected list transform task".to_string(),
1458 ));
1459 }
1460 };
1461 results.push(transform_fn(rebuilt)?);
1462 }
1463 FinishTask::From(mut from, count) => {
1464 from.expressions = transform_pop_results(&mut results, count)?;
1465 results.push(transform_fn(Expression::From(Box::new(from)))?);
1466 }
1467 FinishTask::Select(frame) => {
1468 let mut select = *frame.select;
1469
1470 if frame.qualify_present {
1471 if let Some(ref mut qualify) = select.qualify {
1472 qualify.this = transform_pop_result(&mut results)?;
1473 }
1474 }
1475 if frame.having_present {
1476 if let Some(ref mut having) = select.having {
1477 having.this = transform_pop_result(&mut results)?;
1478 }
1479 }
1480 if frame.group_by_count > 0 {
1481 if let Some(ref mut group_by) = select.group_by {
1482 group_by.expressions =
1483 transform_pop_results(&mut results, frame.group_by_count)?;
1484 }
1485 }
1486 if frame.where_present {
1487 if let Some(ref mut where_clause) = select.where_clause {
1488 where_clause.this = transform_pop_result(&mut results)?;
1489 }
1490 }
1491 if frame.from_present {
1492 match transform_pop_result(&mut results)? {
1493 Expression::From(from) => {
1494 select.from = Some(*from);
1495 }
1496 _ => {
1497 return Err(crate::error::Error::Internal(
1498 "expected FROM expression result".to_string(),
1499 ));
1500 }
1501 }
1502 }
1503 select.expressions = transform_pop_results(&mut results, frame.expr_count)?;
1504
1505 select.joins = select
1506 .joins
1507 .into_iter()
1508 .map(|mut join| {
1509 join.this = transform_recursive(join.this, transform_fn)?;
1510 if let Some(on) = join.on.take() {
1511 join.on = Some(transform_recursive(on, transform_fn)?);
1512 }
1513 match transform_fn(Expression::Join(Box::new(join)))? {
1514 Expression::Join(j) => Ok(*j),
1515 _ => Err(crate::error::Error::parse(
1516 "Join transformation returned non-join expression",
1517 0,
1518 0,
1519 0,
1520 0,
1521 )),
1522 }
1523 })
1524 .collect::<Result<Vec<_>>>()?;
1525
1526 select.lateral_views = select
1527 .lateral_views
1528 .into_iter()
1529 .map(|mut lv| {
1530 lv.this = transform_recursive(lv.this, transform_fn)?;
1531 Ok(lv)
1532 })
1533 .collect::<Result<Vec<_>>>()?;
1534
1535 if let Some(mut with) = select.with.take() {
1536 with.ctes = with
1537 .ctes
1538 .into_iter()
1539 .map(|mut cte| {
1540 let original = cte.this.clone();
1541 cte.this =
1542 transform_recursive(cte.this, transform_fn).unwrap_or(original);
1543 cte
1544 })
1545 .collect();
1546 select.with = Some(with);
1547 }
1548
1549 if let Some(mut order) = select.order_by.take() {
1550 order.expressions = order
1551 .expressions
1552 .into_iter()
1553 .map(|o| {
1554 let mut o = o;
1555 let original = o.this.clone();
1556 o.this =
1557 transform_recursive(o.this, transform_fn).unwrap_or(original);
1558 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1559 Ok(Expression::Ordered(transformed)) => *transformed,
1560 Ok(_) | Err(_) => o,
1561 }
1562 })
1563 .collect();
1564 select.order_by = Some(order);
1565 }
1566
1567 if let Some(ref mut windows) = select.windows {
1568 for nw in windows.iter_mut() {
1569 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1570 .into_iter()
1571 .map(|o| {
1572 let mut o = o;
1573 let original = o.this.clone();
1574 o.this = transform_recursive(o.this, transform_fn)
1575 .unwrap_or(original);
1576 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1577 Ok(Expression::Ordered(transformed)) => *transformed,
1578 Ok(_) | Err(_) => o,
1579 }
1580 })
1581 .collect();
1582 }
1583 }
1584
1585 results.push(transform_fn(Expression::Select(Box::new(select)))?);
1586 }
1587 FinishTask::SetOp(expr) => {
1588 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1589 let left = children.next().expect("left child");
1590 let right = children.next().expect("right child");
1591
1592 let rebuilt = match expr {
1593 Expression::Union(mut union) => {
1594 union.left = left;
1595 union.right = right;
1596 if let Some(mut with) = union.with.take() {
1597 with.ctes = with
1598 .ctes
1599 .into_iter()
1600 .map(|mut cte| {
1601 let original = cte.this.clone();
1602 cte.this = transform_recursive(cte.this, transform_fn)
1603 .unwrap_or(original);
1604 cte
1605 })
1606 .collect();
1607 union.with = Some(with);
1608 }
1609 Expression::Union(union)
1610 }
1611 Expression::Intersect(mut intersect) => {
1612 intersect.left = left;
1613 intersect.right = right;
1614 if let Some(mut with) = intersect.with.take() {
1615 with.ctes = with
1616 .ctes
1617 .into_iter()
1618 .map(|mut cte| {
1619 let original = cte.this.clone();
1620 cte.this = transform_recursive(cte.this, transform_fn)
1621 .unwrap_or(original);
1622 cte
1623 })
1624 .collect();
1625 intersect.with = Some(with);
1626 }
1627 Expression::Intersect(intersect)
1628 }
1629 Expression::Except(mut except) => {
1630 except.left = left;
1631 except.right = right;
1632 if let Some(mut with) = except.with.take() {
1633 with.ctes = with
1634 .ctes
1635 .into_iter()
1636 .map(|mut cte| {
1637 let original = cte.this.clone();
1638 cte.this = transform_recursive(cte.this, transform_fn)
1639 .unwrap_or(original);
1640 cte
1641 })
1642 .collect();
1643 except.with = Some(with);
1644 }
1645 Expression::Except(except)
1646 }
1647 _ => {
1648 return Err(crate::error::Error::Internal(
1649 "unexpected set-op transform task".to_string(),
1650 ));
1651 }
1652 };
1653 results.push(transform_fn(rebuilt)?);
1654 }
1655 },
1656 }
1657 }
1658
1659 match results.len() {
1660 1 => Ok(results.pop().expect("single transform result")),
1661 _ => Err(crate::error::Error::Internal(
1662 "unexpected transform result stack size".to_string(),
1663 )),
1664 }
1665}
1666
1667fn transform_table_ref_recursive<F>(table: TableRef, transform_fn: &F) -> Result<TableRef>
1668where
1669 F: Fn(Expression) -> Result<Expression>,
1670{
1671 match transform_recursive(Expression::Table(Box::new(table)), transform_fn)? {
1672 Expression::Table(table) => Ok(*table),
1673 _ => Err(crate::error::Error::parse(
1674 "TableRef transformation returned non-table expression",
1675 0,
1676 0,
1677 0,
1678 0,
1679 )),
1680 }
1681}
1682
1683fn transform_from_recursive<F>(from: From, transform_fn: &F) -> Result<From>
1684where
1685 F: Fn(Expression) -> Result<Expression>,
1686{
1687 match transform_recursive(Expression::From(Box::new(from)), transform_fn)? {
1688 Expression::From(from) => Ok(*from),
1689 _ => Err(crate::error::Error::parse(
1690 "FROM transformation returned non-FROM expression",
1691 0,
1692 0,
1693 0,
1694 0,
1695 )),
1696 }
1697}
1698
1699fn transform_join_recursive<F>(mut join: Join, transform_fn: &F) -> Result<Join>
1700where
1701 F: Fn(Expression) -> Result<Expression>,
1702{
1703 join.this = transform_recursive(join.this, transform_fn)?;
1704 if let Some(on) = join.on.take() {
1705 join.on = Some(transform_recursive(on, transform_fn)?);
1706 }
1707 if let Some(match_condition) = join.match_condition.take() {
1708 join.match_condition = Some(transform_recursive(match_condition, transform_fn)?);
1709 }
1710 join.pivots = join
1711 .pivots
1712 .into_iter()
1713 .map(|pivot| transform_recursive(pivot, transform_fn))
1714 .collect::<Result<Vec<_>>>()?;
1715
1716 match transform_fn(Expression::Join(Box::new(join)))? {
1717 Expression::Join(join) => Ok(*join),
1718 _ => Err(crate::error::Error::parse(
1719 "Join transformation returned non-join expression",
1720 0,
1721 0,
1722 0,
1723 0,
1724 )),
1725 }
1726}
1727
1728fn transform_output_clause_recursive<F>(
1729 mut output: OutputClause,
1730 transform_fn: &F,
1731) -> Result<OutputClause>
1732where
1733 F: Fn(Expression) -> Result<Expression>,
1734{
1735 output.columns = output
1736 .columns
1737 .into_iter()
1738 .map(|column| transform_recursive(column, transform_fn))
1739 .collect::<Result<Vec<_>>>()?;
1740 if let Some(into_table) = output.into_table.take() {
1741 output.into_table = Some(transform_recursive(into_table, transform_fn)?);
1742 }
1743 Ok(output)
1744}
1745
1746fn transform_with_recursive<F>(mut with: With, transform_fn: &F) -> Result<With>
1747where
1748 F: Fn(Expression) -> Result<Expression>,
1749{
1750 with.ctes = with
1751 .ctes
1752 .into_iter()
1753 .map(|mut cte| {
1754 cte.this = transform_recursive(cte.this, transform_fn)?;
1755 Ok(cte)
1756 })
1757 .collect::<Result<Vec<_>>>()?;
1758 if let Some(search) = with.search.take() {
1759 with.search = Some(Box::new(transform_recursive(*search, transform_fn)?));
1760 }
1761 Ok(with)
1762}
1763
1764fn transform_order_by_recursive<F>(mut order: OrderBy, transform_fn: &F) -> Result<OrderBy>
1765where
1766 F: Fn(Expression) -> Result<Expression>,
1767{
1768 order.expressions = order
1769 .expressions
1770 .into_iter()
1771 .map(|mut ordered| {
1772 let original = ordered.this.clone();
1773 ordered.this = transform_recursive(ordered.this, transform_fn).unwrap_or(original);
1774 match transform_fn(Expression::Ordered(Box::new(ordered.clone()))) {
1775 Ok(Expression::Ordered(transformed)) => Ok(*transformed),
1776 Ok(_) | Err(_) => Ok(ordered),
1777 }
1778 })
1779 .collect::<Result<Vec<_>>>()?;
1780 Ok(order)
1781}
1782
1783fn transform_recursive_reference<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
1784where
1785 F: Fn(Expression) -> Result<Expression>,
1786{
1787 use crate::expressions::BinaryOp;
1788
1789 // Helper macro to recurse into AggFunc-based expressions (this, filter, order_by, having_max, limit).
1790 macro_rules! recurse_agg {
1791 ($variant:ident, $f:expr) => {{
1792 let mut f = $f;
1793 f.this = transform_recursive(f.this, transform_fn)?;
1794 if let Some(filter) = f.filter.take() {
1795 f.filter = Some(transform_recursive(filter, transform_fn)?);
1796 }
1797 for ord in &mut f.order_by {
1798 ord.this = transform_recursive(
1799 std::mem::replace(&mut ord.this, Expression::Null(crate::expressions::Null)),
1800 transform_fn,
1801 )?;
1802 }
1803 if let Some((ref mut expr, _)) = f.having_max {
1804 *expr = Box::new(transform_recursive(
1805 std::mem::replace(expr.as_mut(), Expression::Null(crate::expressions::Null)),
1806 transform_fn,
1807 )?);
1808 }
1809 if let Some(limit) = f.limit.take() {
1810 f.limit = Some(Box::new(transform_recursive(*limit, transform_fn)?));
1811 }
1812 Expression::$variant(f)
1813 }};
1814 }
1815
1816 // Helper macro to transform binary ops with Box<BinaryOp>
1817 macro_rules! transform_binary {
1818 ($variant:ident, $op:expr) => {{
1819 let left = transform_recursive($op.left, transform_fn)?;
1820 let right = transform_recursive($op.right, transform_fn)?;
1821 Expression::$variant(Box::new(BinaryOp {
1822 left,
1823 right,
1824 left_comments: $op.left_comments,
1825 operator_comments: $op.operator_comments,
1826 trailing_comments: $op.trailing_comments,
1827 inferred_type: $op.inferred_type,
1828 }))
1829 }};
1830 }
1831
1832 // Fast path: leaf nodes never need child traversal, apply transform directly
1833 if matches!(
1834 &expr,
1835 Expression::Literal(_)
1836 | Expression::Boolean(_)
1837 | Expression::Null(_)
1838 | Expression::Identifier(_)
1839 | Expression::Star(_)
1840 | Expression::Parameter(_)
1841 | Expression::Placeholder(_)
1842 | Expression::SessionParameter(_)
1843 ) {
1844 return transform_fn(expr);
1845 }
1846
1847 // First recursively transform children, then apply the transform function
1848 let expr = match expr {
1849 Expression::Select(mut select) => {
1850 select.expressions = select
1851 .expressions
1852 .into_iter()
1853 .map(|e| transform_recursive(e, transform_fn))
1854 .collect::<Result<Vec<_>>>()?;
1855
1856 // Transform FROM clause
1857 if let Some(mut from) = select.from.take() {
1858 from.expressions = from
1859 .expressions
1860 .into_iter()
1861 .map(|e| transform_recursive(e, transform_fn))
1862 .collect::<Result<Vec<_>>>()?;
1863 select.from = Some(from);
1864 }
1865
1866 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
1867 select.joins = select
1868 .joins
1869 .into_iter()
1870 .map(|mut join| {
1871 join.this = transform_recursive(join.this, transform_fn)?;
1872 if let Some(on) = join.on.take() {
1873 join.on = Some(transform_recursive(on, transform_fn)?);
1874 }
1875 // Wrap join in Expression::Join to allow transform_fn to transform it
1876 match transform_fn(Expression::Join(Box::new(join)))? {
1877 Expression::Join(j) => Ok(*j),
1878 _ => Err(crate::error::Error::parse(
1879 "Join transformation returned non-join expression",
1880 0,
1881 0,
1882 0,
1883 0,
1884 )),
1885 }
1886 })
1887 .collect::<Result<Vec<_>>>()?;
1888
1889 // Transform LATERAL VIEW expressions (Hive/Spark)
1890 select.lateral_views = select
1891 .lateral_views
1892 .into_iter()
1893 .map(|mut lv| {
1894 lv.this = transform_recursive(lv.this, transform_fn)?;
1895 Ok(lv)
1896 })
1897 .collect::<Result<Vec<_>>>()?;
1898
1899 // Transform WHERE clause
1900 if let Some(mut where_clause) = select.where_clause.take() {
1901 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1902 select.where_clause = Some(where_clause);
1903 }
1904
1905 // Transform GROUP BY
1906 if let Some(mut group_by) = select.group_by.take() {
1907 group_by.expressions = group_by
1908 .expressions
1909 .into_iter()
1910 .map(|e| transform_recursive(e, transform_fn))
1911 .collect::<Result<Vec<_>>>()?;
1912 select.group_by = Some(group_by);
1913 }
1914
1915 // Transform HAVING
1916 if let Some(mut having) = select.having.take() {
1917 having.this = transform_recursive(having.this, transform_fn)?;
1918 select.having = Some(having);
1919 }
1920
1921 // Transform WITH (CTEs)
1922 if let Some(mut with) = select.with.take() {
1923 with.ctes = with
1924 .ctes
1925 .into_iter()
1926 .map(|mut cte| {
1927 let original = cte.this.clone();
1928 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1929 cte
1930 })
1931 .collect();
1932 select.with = Some(with);
1933 }
1934
1935 // Transform ORDER BY
1936 if let Some(mut order) = select.order_by.take() {
1937 order.expressions = order
1938 .expressions
1939 .into_iter()
1940 .map(|o| {
1941 let mut o = o;
1942 let original = o.this.clone();
1943 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1944 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
1945 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1946 Ok(Expression::Ordered(transformed)) => *transformed,
1947 Ok(_) | Err(_) => o,
1948 }
1949 })
1950 .collect();
1951 select.order_by = Some(order);
1952 }
1953
1954 // Transform WINDOW clause order_by
1955 if let Some(ref mut windows) = select.windows {
1956 for nw in windows.iter_mut() {
1957 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1958 .into_iter()
1959 .map(|o| {
1960 let mut o = o;
1961 let original = o.this.clone();
1962 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1963 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1964 Ok(Expression::Ordered(transformed)) => *transformed,
1965 Ok(_) | Err(_) => o,
1966 }
1967 })
1968 .collect();
1969 }
1970 }
1971
1972 // Transform QUALIFY
1973 if let Some(mut qual) = select.qualify.take() {
1974 qual.this = transform_recursive(qual.this, transform_fn)?;
1975 select.qualify = Some(qual);
1976 }
1977
1978 Expression::Select(select)
1979 }
1980 Expression::Function(mut f) => {
1981 f.args = f
1982 .args
1983 .into_iter()
1984 .map(|e| transform_recursive(e, transform_fn))
1985 .collect::<Result<Vec<_>>>()?;
1986 Expression::Function(f)
1987 }
1988 Expression::AggregateFunction(mut f) => {
1989 f.args = f
1990 .args
1991 .into_iter()
1992 .map(|e| transform_recursive(e, transform_fn))
1993 .collect::<Result<Vec<_>>>()?;
1994 if let Some(filter) = f.filter {
1995 f.filter = Some(transform_recursive(filter, transform_fn)?);
1996 }
1997 Expression::AggregateFunction(f)
1998 }
1999 Expression::WindowFunction(mut wf) => {
2000 wf.this = transform_recursive(wf.this, transform_fn)?;
2001 wf.over.partition_by = wf
2002 .over
2003 .partition_by
2004 .into_iter()
2005 .map(|e| transform_recursive(e, transform_fn))
2006 .collect::<Result<Vec<_>>>()?;
2007 // Transform order_by items through Expression::Ordered wrapper
2008 wf.over.order_by = wf
2009 .over
2010 .order_by
2011 .into_iter()
2012 .map(|o| {
2013 let mut o = o;
2014 o.this = transform_recursive(o.this, transform_fn)?;
2015 match transform_fn(Expression::Ordered(Box::new(o)))? {
2016 Expression::Ordered(transformed) => Ok(*transformed),
2017 _ => Err(crate::error::Error::parse(
2018 "Ordered transformation returned non-Ordered expression",
2019 0,
2020 0,
2021 0,
2022 0,
2023 )),
2024 }
2025 })
2026 .collect::<Result<Vec<_>>>()?;
2027 Expression::WindowFunction(wf)
2028 }
2029 Expression::Alias(mut a) => {
2030 a.this = transform_recursive(a.this, transform_fn)?;
2031 Expression::Alias(a)
2032 }
2033 Expression::Cast(mut c) => {
2034 c.this = transform_recursive(c.this, transform_fn)?;
2035 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
2036 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2037 Expression::Cast(c)
2038 }
2039 Expression::And(op) => transform_binary!(And, *op),
2040 Expression::Or(op) => transform_binary!(Or, *op),
2041 Expression::Add(op) => transform_binary!(Add, *op),
2042 Expression::Sub(op) => transform_binary!(Sub, *op),
2043 Expression::Mul(op) => transform_binary!(Mul, *op),
2044 Expression::Div(op) => transform_binary!(Div, *op),
2045 Expression::Eq(op) => transform_binary!(Eq, *op),
2046 Expression::Lt(op) => transform_binary!(Lt, *op),
2047 Expression::Gt(op) => transform_binary!(Gt, *op),
2048 Expression::Paren(mut p) => {
2049 p.this = transform_recursive(p.this, transform_fn)?;
2050 Expression::Paren(p)
2051 }
2052 Expression::Coalesce(mut f) => {
2053 f.expressions = f
2054 .expressions
2055 .into_iter()
2056 .map(|e| transform_recursive(e, transform_fn))
2057 .collect::<Result<Vec<_>>>()?;
2058 Expression::Coalesce(f)
2059 }
2060 Expression::IfNull(mut f) => {
2061 f.this = transform_recursive(f.this, transform_fn)?;
2062 f.expression = transform_recursive(f.expression, transform_fn)?;
2063 Expression::IfNull(f)
2064 }
2065 Expression::Nvl(mut f) => {
2066 f.this = transform_recursive(f.this, transform_fn)?;
2067 f.expression = transform_recursive(f.expression, transform_fn)?;
2068 Expression::Nvl(f)
2069 }
2070 Expression::In(mut i) => {
2071 i.this = transform_recursive(i.this, transform_fn)?;
2072 i.expressions = i
2073 .expressions
2074 .into_iter()
2075 .map(|e| transform_recursive(e, transform_fn))
2076 .collect::<Result<Vec<_>>>()?;
2077 if let Some(query) = i.query {
2078 i.query = Some(transform_recursive(query, transform_fn)?);
2079 }
2080 Expression::In(i)
2081 }
2082 Expression::Not(mut n) => {
2083 n.this = transform_recursive(n.this, transform_fn)?;
2084 Expression::Not(n)
2085 }
2086 Expression::ArraySlice(mut s) => {
2087 s.this = transform_recursive(s.this, transform_fn)?;
2088 if let Some(start) = s.start {
2089 s.start = Some(transform_recursive(start, transform_fn)?);
2090 }
2091 if let Some(end) = s.end {
2092 s.end = Some(transform_recursive(end, transform_fn)?);
2093 }
2094 Expression::ArraySlice(s)
2095 }
2096 Expression::Subscript(mut s) => {
2097 s.this = transform_recursive(s.this, transform_fn)?;
2098 s.index = transform_recursive(s.index, transform_fn)?;
2099 Expression::Subscript(s)
2100 }
2101 Expression::Array(mut a) => {
2102 a.expressions = a
2103 .expressions
2104 .into_iter()
2105 .map(|e| transform_recursive(e, transform_fn))
2106 .collect::<Result<Vec<_>>>()?;
2107 Expression::Array(a)
2108 }
2109 Expression::Struct(mut s) => {
2110 let mut new_fields = Vec::new();
2111 for (name, expr) in s.fields {
2112 let transformed = transform_recursive(expr, transform_fn)?;
2113 new_fields.push((name, transformed));
2114 }
2115 s.fields = new_fields;
2116 Expression::Struct(s)
2117 }
2118 Expression::NamedArgument(mut na) => {
2119 na.value = transform_recursive(na.value, transform_fn)?;
2120 Expression::NamedArgument(na)
2121 }
2122 Expression::MapFunc(mut m) => {
2123 m.keys = m
2124 .keys
2125 .into_iter()
2126 .map(|e| transform_recursive(e, transform_fn))
2127 .collect::<Result<Vec<_>>>()?;
2128 m.values = m
2129 .values
2130 .into_iter()
2131 .map(|e| transform_recursive(e, transform_fn))
2132 .collect::<Result<Vec<_>>>()?;
2133 Expression::MapFunc(m)
2134 }
2135 Expression::ArrayFunc(mut a) => {
2136 a.expressions = a
2137 .expressions
2138 .into_iter()
2139 .map(|e| transform_recursive(e, transform_fn))
2140 .collect::<Result<Vec<_>>>()?;
2141 Expression::ArrayFunc(a)
2142 }
2143 Expression::Lambda(mut l) => {
2144 l.body = transform_recursive(l.body, transform_fn)?;
2145 Expression::Lambda(l)
2146 }
2147 Expression::JsonExtract(mut f) => {
2148 f.this = transform_recursive(f.this, transform_fn)?;
2149 f.path = transform_recursive(f.path, transform_fn)?;
2150 Expression::JsonExtract(f)
2151 }
2152 Expression::JsonExtractScalar(mut f) => {
2153 f.this = transform_recursive(f.this, transform_fn)?;
2154 f.path = transform_recursive(f.path, transform_fn)?;
2155 Expression::JsonExtractScalar(f)
2156 }
2157
2158 // ===== UnaryFunc-based expressions =====
2159 // These all have a single `this: Expression` child
2160 Expression::Length(mut f) => {
2161 f.this = transform_recursive(f.this, transform_fn)?;
2162 Expression::Length(f)
2163 }
2164 Expression::Upper(mut f) => {
2165 f.this = transform_recursive(f.this, transform_fn)?;
2166 Expression::Upper(f)
2167 }
2168 Expression::Lower(mut f) => {
2169 f.this = transform_recursive(f.this, transform_fn)?;
2170 Expression::Lower(f)
2171 }
2172 Expression::LTrim(mut f) => {
2173 f.this = transform_recursive(f.this, transform_fn)?;
2174 Expression::LTrim(f)
2175 }
2176 Expression::RTrim(mut f) => {
2177 f.this = transform_recursive(f.this, transform_fn)?;
2178 Expression::RTrim(f)
2179 }
2180 Expression::Reverse(mut f) => {
2181 f.this = transform_recursive(f.this, transform_fn)?;
2182 Expression::Reverse(f)
2183 }
2184 Expression::Abs(mut f) => {
2185 f.this = transform_recursive(f.this, transform_fn)?;
2186 Expression::Abs(f)
2187 }
2188 Expression::Ceil(mut f) => {
2189 f.this = transform_recursive(f.this, transform_fn)?;
2190 Expression::Ceil(f)
2191 }
2192 Expression::Floor(mut f) => {
2193 f.this = transform_recursive(f.this, transform_fn)?;
2194 Expression::Floor(f)
2195 }
2196 Expression::Sign(mut f) => {
2197 f.this = transform_recursive(f.this, transform_fn)?;
2198 Expression::Sign(f)
2199 }
2200 Expression::Sqrt(mut f) => {
2201 f.this = transform_recursive(f.this, transform_fn)?;
2202 Expression::Sqrt(f)
2203 }
2204 Expression::Cbrt(mut f) => {
2205 f.this = transform_recursive(f.this, transform_fn)?;
2206 Expression::Cbrt(f)
2207 }
2208 Expression::Ln(mut f) => {
2209 f.this = transform_recursive(f.this, transform_fn)?;
2210 Expression::Ln(f)
2211 }
2212 Expression::Log(mut f) => {
2213 f.this = transform_recursive(f.this, transform_fn)?;
2214 if let Some(base) = f.base {
2215 f.base = Some(transform_recursive(base, transform_fn)?);
2216 }
2217 Expression::Log(f)
2218 }
2219 Expression::Exp(mut f) => {
2220 f.this = transform_recursive(f.this, transform_fn)?;
2221 Expression::Exp(f)
2222 }
2223 Expression::Date(mut f) => {
2224 f.this = transform_recursive(f.this, transform_fn)?;
2225 Expression::Date(f)
2226 }
2227 Expression::Stddev(f) => recurse_agg!(Stddev, f),
2228 Expression::StddevSamp(f) => recurse_agg!(StddevSamp, f),
2229 Expression::Variance(f) => recurse_agg!(Variance, f),
2230
2231 // ===== BinaryFunc-based expressions =====
2232 Expression::ModFunc(mut f) => {
2233 f.this = transform_recursive(f.this, transform_fn)?;
2234 f.expression = transform_recursive(f.expression, transform_fn)?;
2235 Expression::ModFunc(f)
2236 }
2237 Expression::Power(mut f) => {
2238 f.this = transform_recursive(f.this, transform_fn)?;
2239 f.expression = transform_recursive(f.expression, transform_fn)?;
2240 Expression::Power(f)
2241 }
2242 Expression::MapFromArrays(mut f) => {
2243 f.this = transform_recursive(f.this, transform_fn)?;
2244 f.expression = transform_recursive(f.expression, transform_fn)?;
2245 Expression::MapFromArrays(f)
2246 }
2247 Expression::ElementAt(mut f) => {
2248 f.this = transform_recursive(f.this, transform_fn)?;
2249 f.expression = transform_recursive(f.expression, transform_fn)?;
2250 Expression::ElementAt(f)
2251 }
2252 Expression::MapContainsKey(mut f) => {
2253 f.this = transform_recursive(f.this, transform_fn)?;
2254 f.expression = transform_recursive(f.expression, transform_fn)?;
2255 Expression::MapContainsKey(f)
2256 }
2257 Expression::Left(mut f) => {
2258 f.this = transform_recursive(f.this, transform_fn)?;
2259 f.length = transform_recursive(f.length, transform_fn)?;
2260 Expression::Left(f)
2261 }
2262 Expression::Right(mut f) => {
2263 f.this = transform_recursive(f.this, transform_fn)?;
2264 f.length = transform_recursive(f.length, transform_fn)?;
2265 Expression::Right(f)
2266 }
2267 Expression::Repeat(mut f) => {
2268 f.this = transform_recursive(f.this, transform_fn)?;
2269 f.times = transform_recursive(f.times, transform_fn)?;
2270 Expression::Repeat(f)
2271 }
2272
2273 // ===== Complex function expressions =====
2274 Expression::Substring(mut f) => {
2275 f.this = transform_recursive(f.this, transform_fn)?;
2276 f.start = transform_recursive(f.start, transform_fn)?;
2277 if let Some(len) = f.length {
2278 f.length = Some(transform_recursive(len, transform_fn)?);
2279 }
2280 Expression::Substring(f)
2281 }
2282 Expression::Replace(mut f) => {
2283 f.this = transform_recursive(f.this, transform_fn)?;
2284 f.old = transform_recursive(f.old, transform_fn)?;
2285 f.new = transform_recursive(f.new, transform_fn)?;
2286 Expression::Replace(f)
2287 }
2288 Expression::ConcatWs(mut f) => {
2289 f.separator = transform_recursive(f.separator, transform_fn)?;
2290 f.expressions = f
2291 .expressions
2292 .into_iter()
2293 .map(|e| transform_recursive(e, transform_fn))
2294 .collect::<Result<Vec<_>>>()?;
2295 Expression::ConcatWs(f)
2296 }
2297 Expression::Trim(mut f) => {
2298 f.this = transform_recursive(f.this, transform_fn)?;
2299 if let Some(chars) = f.characters {
2300 f.characters = Some(transform_recursive(chars, transform_fn)?);
2301 }
2302 Expression::Trim(f)
2303 }
2304 Expression::Split(mut f) => {
2305 f.this = transform_recursive(f.this, transform_fn)?;
2306 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
2307 Expression::Split(f)
2308 }
2309 Expression::Lpad(mut f) => {
2310 f.this = transform_recursive(f.this, transform_fn)?;
2311 f.length = transform_recursive(f.length, transform_fn)?;
2312 if let Some(fill) = f.fill {
2313 f.fill = Some(transform_recursive(fill, transform_fn)?);
2314 }
2315 Expression::Lpad(f)
2316 }
2317 Expression::Rpad(mut f) => {
2318 f.this = transform_recursive(f.this, transform_fn)?;
2319 f.length = transform_recursive(f.length, transform_fn)?;
2320 if let Some(fill) = f.fill {
2321 f.fill = Some(transform_recursive(fill, transform_fn)?);
2322 }
2323 Expression::Rpad(f)
2324 }
2325
2326 // ===== Conditional expressions =====
2327 Expression::Case(mut c) => {
2328 if let Some(operand) = c.operand {
2329 c.operand = Some(transform_recursive(operand, transform_fn)?);
2330 }
2331 c.whens = c
2332 .whens
2333 .into_iter()
2334 .map(|(cond, then)| {
2335 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
2336 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
2337 (new_cond, new_then)
2338 })
2339 .collect();
2340 if let Some(else_expr) = c.else_ {
2341 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
2342 }
2343 Expression::Case(c)
2344 }
2345 Expression::IfFunc(mut f) => {
2346 f.condition = transform_recursive(f.condition, transform_fn)?;
2347 f.true_value = transform_recursive(f.true_value, transform_fn)?;
2348 if let Some(false_val) = f.false_value {
2349 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
2350 }
2351 Expression::IfFunc(f)
2352 }
2353
2354 // ===== Date/Time expressions =====
2355 Expression::DateAdd(mut f) => {
2356 f.this = transform_recursive(f.this, transform_fn)?;
2357 f.interval = transform_recursive(f.interval, transform_fn)?;
2358 Expression::DateAdd(f)
2359 }
2360 Expression::DateSub(mut f) => {
2361 f.this = transform_recursive(f.this, transform_fn)?;
2362 f.interval = transform_recursive(f.interval, transform_fn)?;
2363 Expression::DateSub(f)
2364 }
2365 Expression::DateDiff(mut f) => {
2366 f.this = transform_recursive(f.this, transform_fn)?;
2367 f.expression = transform_recursive(f.expression, transform_fn)?;
2368 Expression::DateDiff(f)
2369 }
2370 Expression::DateTrunc(mut f) => {
2371 f.this = transform_recursive(f.this, transform_fn)?;
2372 Expression::DateTrunc(f)
2373 }
2374 Expression::Extract(mut f) => {
2375 f.this = transform_recursive(f.this, transform_fn)?;
2376 Expression::Extract(f)
2377 }
2378
2379 // ===== JSON expressions =====
2380 Expression::JsonObject(mut f) => {
2381 f.pairs = f
2382 .pairs
2383 .into_iter()
2384 .map(|(k, v)| {
2385 let new_k = transform_recursive(k, transform_fn)?;
2386 let new_v = transform_recursive(v, transform_fn)?;
2387 Ok((new_k, new_v))
2388 })
2389 .collect::<Result<Vec<_>>>()?;
2390 Expression::JsonObject(f)
2391 }
2392
2393 // ===== Subquery expressions =====
2394 Expression::Subquery(mut s) => {
2395 s.this = transform_recursive(s.this, transform_fn)?;
2396 Expression::Subquery(s)
2397 }
2398 Expression::Exists(mut e) => {
2399 e.this = transform_recursive(e.this, transform_fn)?;
2400 Expression::Exists(e)
2401 }
2402 Expression::Describe(mut d) => {
2403 d.target = transform_recursive(d.target, transform_fn)?;
2404 Expression::Describe(d)
2405 }
2406
2407 // ===== Set operations =====
2408 Expression::Union(mut u) => {
2409 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
2410 u.left = transform_recursive(left, transform_fn)?;
2411 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
2412 u.right = transform_recursive(right, transform_fn)?;
2413 if let Some(mut with) = u.with.take() {
2414 with.ctes = with
2415 .ctes
2416 .into_iter()
2417 .map(|mut cte| {
2418 let original = cte.this.clone();
2419 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2420 cte
2421 })
2422 .collect();
2423 u.with = Some(with);
2424 }
2425 Expression::Union(u)
2426 }
2427 Expression::Intersect(mut i) => {
2428 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
2429 i.left = transform_recursive(left, transform_fn)?;
2430 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
2431 i.right = transform_recursive(right, transform_fn)?;
2432 if let Some(mut with) = i.with.take() {
2433 with.ctes = with
2434 .ctes
2435 .into_iter()
2436 .map(|mut cte| {
2437 let original = cte.this.clone();
2438 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2439 cte
2440 })
2441 .collect();
2442 i.with = Some(with);
2443 }
2444 Expression::Intersect(i)
2445 }
2446 Expression::Except(mut e) => {
2447 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
2448 e.left = transform_recursive(left, transform_fn)?;
2449 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
2450 e.right = transform_recursive(right, transform_fn)?;
2451 if let Some(mut with) = e.with.take() {
2452 with.ctes = with
2453 .ctes
2454 .into_iter()
2455 .map(|mut cte| {
2456 let original = cte.this.clone();
2457 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2458 cte
2459 })
2460 .collect();
2461 e.with = Some(with);
2462 }
2463 Expression::Except(e)
2464 }
2465
2466 // ===== DML expressions =====
2467 Expression::Insert(mut ins) => {
2468 // Transform VALUES clause expressions
2469 let mut new_values = Vec::new();
2470 for row in ins.values {
2471 let mut new_row = Vec::new();
2472 for e in row {
2473 new_row.push(transform_recursive(e, transform_fn)?);
2474 }
2475 new_values.push(new_row);
2476 }
2477 ins.values = new_values;
2478
2479 // Transform query (for INSERT ... SELECT)
2480 if let Some(query) = ins.query {
2481 ins.query = Some(transform_recursive(query, transform_fn)?);
2482 }
2483
2484 // Transform RETURNING clause
2485 let mut new_returning = Vec::new();
2486 for e in ins.returning {
2487 new_returning.push(transform_recursive(e, transform_fn)?);
2488 }
2489 ins.returning = new_returning;
2490
2491 // Transform ON CONFLICT clause
2492 if let Some(on_conflict) = ins.on_conflict {
2493 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
2494 }
2495
2496 Expression::Insert(ins)
2497 }
2498 Expression::Update(mut upd) => {
2499 upd.table = transform_table_ref_recursive(upd.table, transform_fn)?;
2500 upd.extra_tables = upd
2501 .extra_tables
2502 .into_iter()
2503 .map(|table| transform_table_ref_recursive(table, transform_fn))
2504 .collect::<Result<Vec<_>>>()?;
2505 upd.table_joins = upd
2506 .table_joins
2507 .into_iter()
2508 .map(|join| transform_join_recursive(join, transform_fn))
2509 .collect::<Result<Vec<_>>>()?;
2510 upd.set = upd
2511 .set
2512 .into_iter()
2513 .map(|(id, val)| {
2514 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
2515 (id, new_val)
2516 })
2517 .collect();
2518 if let Some(from_clause) = upd.from_clause.take() {
2519 upd.from_clause = Some(transform_from_recursive(from_clause, transform_fn)?);
2520 }
2521 upd.from_joins = upd
2522 .from_joins
2523 .into_iter()
2524 .map(|join| transform_join_recursive(join, transform_fn))
2525 .collect::<Result<Vec<_>>>()?;
2526 if let Some(mut where_clause) = upd.where_clause.take() {
2527 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2528 upd.where_clause = Some(where_clause);
2529 }
2530 upd.returning = upd
2531 .returning
2532 .into_iter()
2533 .map(|expr| transform_recursive(expr, transform_fn))
2534 .collect::<Result<Vec<_>>>()?;
2535 if let Some(output) = upd.output.take() {
2536 upd.output = Some(transform_output_clause_recursive(output, transform_fn)?);
2537 }
2538 if let Some(with) = upd.with.take() {
2539 upd.with = Some(transform_with_recursive(with, transform_fn)?);
2540 }
2541 if let Some(limit) = upd.limit.take() {
2542 upd.limit = Some(transform_recursive(limit, transform_fn)?);
2543 }
2544 if let Some(order_by) = upd.order_by.take() {
2545 upd.order_by = Some(transform_order_by_recursive(order_by, transform_fn)?);
2546 }
2547 Expression::Update(upd)
2548 }
2549 Expression::Delete(mut del) => {
2550 del.table = transform_table_ref_recursive(del.table, transform_fn)?;
2551 del.using = del
2552 .using
2553 .into_iter()
2554 .map(|table| transform_table_ref_recursive(table, transform_fn))
2555 .collect::<Result<Vec<_>>>()?;
2556 if let Some(mut where_clause) = del.where_clause.take() {
2557 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2558 del.where_clause = Some(where_clause);
2559 }
2560 if let Some(output) = del.output.take() {
2561 del.output = Some(transform_output_clause_recursive(output, transform_fn)?);
2562 }
2563 if let Some(with) = del.with.take() {
2564 del.with = Some(transform_with_recursive(with, transform_fn)?);
2565 }
2566 if let Some(limit) = del.limit.take() {
2567 del.limit = Some(transform_recursive(limit, transform_fn)?);
2568 }
2569 if let Some(order_by) = del.order_by.take() {
2570 del.order_by = Some(transform_order_by_recursive(order_by, transform_fn)?);
2571 }
2572 del.returning = del
2573 .returning
2574 .into_iter()
2575 .map(|expr| transform_recursive(expr, transform_fn))
2576 .collect::<Result<Vec<_>>>()?;
2577 del.tables = del
2578 .tables
2579 .into_iter()
2580 .map(|table| transform_table_ref_recursive(table, transform_fn))
2581 .collect::<Result<Vec<_>>>()?;
2582 del.joins = del
2583 .joins
2584 .into_iter()
2585 .map(|join| transform_join_recursive(join, transform_fn))
2586 .collect::<Result<Vec<_>>>()?;
2587 Expression::Delete(del)
2588 }
2589
2590 // ===== CTE expressions =====
2591 Expression::With(mut w) => {
2592 w.ctes = w
2593 .ctes
2594 .into_iter()
2595 .map(|mut cte| {
2596 let original = cte.this.clone();
2597 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2598 cte
2599 })
2600 .collect();
2601 Expression::With(w)
2602 }
2603 Expression::Cte(mut c) => {
2604 c.this = transform_recursive(c.this, transform_fn)?;
2605 Expression::Cte(c)
2606 }
2607
2608 // ===== Order expressions =====
2609 Expression::Ordered(mut o) => {
2610 o.this = transform_recursive(o.this, transform_fn)?;
2611 Expression::Ordered(o)
2612 }
2613
2614 // ===== Negation =====
2615 Expression::Neg(mut n) => {
2616 n.this = transform_recursive(n.this, transform_fn)?;
2617 Expression::Neg(n)
2618 }
2619
2620 // ===== Between =====
2621 Expression::Between(mut b) => {
2622 b.this = transform_recursive(b.this, transform_fn)?;
2623 b.low = transform_recursive(b.low, transform_fn)?;
2624 b.high = transform_recursive(b.high, transform_fn)?;
2625 Expression::Between(b)
2626 }
2627 Expression::IsNull(mut i) => {
2628 i.this = transform_recursive(i.this, transform_fn)?;
2629 Expression::IsNull(i)
2630 }
2631 Expression::IsTrue(mut i) => {
2632 i.this = transform_recursive(i.this, transform_fn)?;
2633 Expression::IsTrue(i)
2634 }
2635 Expression::IsFalse(mut i) => {
2636 i.this = transform_recursive(i.this, transform_fn)?;
2637 Expression::IsFalse(i)
2638 }
2639
2640 // ===== Like expressions =====
2641 Expression::Like(mut l) => {
2642 l.left = transform_recursive(l.left, transform_fn)?;
2643 l.right = transform_recursive(l.right, transform_fn)?;
2644 Expression::Like(l)
2645 }
2646 Expression::ILike(mut l) => {
2647 l.left = transform_recursive(l.left, transform_fn)?;
2648 l.right = transform_recursive(l.right, transform_fn)?;
2649 Expression::ILike(l)
2650 }
2651
2652 // ===== Additional binary ops not covered by macro =====
2653 Expression::Neq(op) => transform_binary!(Neq, *op),
2654 Expression::Lte(op) => transform_binary!(Lte, *op),
2655 Expression::Gte(op) => transform_binary!(Gte, *op),
2656 Expression::Mod(op) => transform_binary!(Mod, *op),
2657 Expression::Concat(op) => transform_binary!(Concat, *op),
2658 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
2659 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
2660 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
2661 Expression::Is(op) => transform_binary!(Is, *op),
2662
2663 // ===== TryCast / SafeCast =====
2664 Expression::TryCast(mut c) => {
2665 c.this = transform_recursive(c.this, transform_fn)?;
2666 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2667 Expression::TryCast(c)
2668 }
2669 Expression::SafeCast(mut c) => {
2670 c.this = transform_recursive(c.this, transform_fn)?;
2671 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2672 Expression::SafeCast(c)
2673 }
2674
2675 // ===== Misc =====
2676 Expression::Unnest(mut f) => {
2677 f.this = transform_recursive(f.this, transform_fn)?;
2678 f.expressions = f
2679 .expressions
2680 .into_iter()
2681 .map(|e| transform_recursive(e, transform_fn))
2682 .collect::<Result<Vec<_>>>()?;
2683 Expression::Unnest(f)
2684 }
2685 Expression::Explode(mut f) => {
2686 f.this = transform_recursive(f.this, transform_fn)?;
2687 Expression::Explode(f)
2688 }
2689 Expression::GroupConcat(mut f) => {
2690 f.this = transform_recursive(f.this, transform_fn)?;
2691 Expression::GroupConcat(f)
2692 }
2693 Expression::StringAgg(mut f) => {
2694 f.this = transform_recursive(f.this, transform_fn)?;
2695 Expression::StringAgg(f)
2696 }
2697 Expression::ListAgg(mut f) => {
2698 f.this = transform_recursive(f.this, transform_fn)?;
2699 Expression::ListAgg(f)
2700 }
2701 Expression::ArrayAgg(mut f) => {
2702 f.this = transform_recursive(f.this, transform_fn)?;
2703 Expression::ArrayAgg(f)
2704 }
2705 Expression::ParseJson(mut f) => {
2706 f.this = transform_recursive(f.this, transform_fn)?;
2707 Expression::ParseJson(f)
2708 }
2709 Expression::ToJson(mut f) => {
2710 f.this = transform_recursive(f.this, transform_fn)?;
2711 Expression::ToJson(f)
2712 }
2713 Expression::JSONExtract(mut e) => {
2714 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2715 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2716 Expression::JSONExtract(e)
2717 }
2718 Expression::JSONExtractScalar(mut e) => {
2719 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2720 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2721 Expression::JSONExtractScalar(e)
2722 }
2723
2724 // StrToTime: recurse into this
2725 Expression::StrToTime(mut e) => {
2726 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2727 Expression::StrToTime(e)
2728 }
2729
2730 // UnixToTime: recurse into this
2731 Expression::UnixToTime(mut e) => {
2732 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2733 Expression::UnixToTime(e)
2734 }
2735
2736 // CreateTable: recurse into column defaults, on_update expressions, and data types
2737 Expression::CreateTable(mut ct) => {
2738 for col in &mut ct.columns {
2739 if let Some(default_expr) = col.default.take() {
2740 col.default = Some(transform_recursive(default_expr, transform_fn)?);
2741 }
2742 if let Some(on_update_expr) = col.on_update.take() {
2743 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
2744 }
2745 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
2746 // are NOT applied here because per-dialect transforms are designed for CAST/expression
2747 // contexts and may not produce correct results for DDL column definitions.
2748 // The DDL type mappings would need dedicated handling per source/target pair.
2749 }
2750 if let Some(as_select) = ct.as_select.take() {
2751 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
2752 }
2753 Expression::CreateTable(ct)
2754 }
2755
2756 // CreateView: recurse into the view body query
2757 Expression::CreateView(mut cv) => {
2758 cv.query = transform_recursive(cv.query, transform_fn)?;
2759 Expression::CreateView(cv)
2760 }
2761
2762 // CreateTask: recurse into the task body
2763 Expression::CreateTask(mut ct) => {
2764 ct.body = transform_recursive(ct.body, transform_fn)?;
2765 Expression::CreateTask(ct)
2766 }
2767
2768 // CreateProcedure: recurse into body expressions
2769 Expression::CreateProcedure(mut cp) => {
2770 if let Some(body) = cp.body.take() {
2771 cp.body = Some(match body {
2772 FunctionBody::Expression(expr) => {
2773 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2774 }
2775 FunctionBody::Return(expr) => {
2776 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2777 }
2778 FunctionBody::Statements(stmts) => {
2779 let transformed_stmts = stmts
2780 .into_iter()
2781 .map(|s| transform_recursive(s, transform_fn))
2782 .collect::<Result<Vec<_>>>()?;
2783 FunctionBody::Statements(transformed_stmts)
2784 }
2785 other => other,
2786 });
2787 }
2788 Expression::CreateProcedure(cp)
2789 }
2790
2791 // CreateFunction: recurse into body expressions
2792 Expression::CreateFunction(mut cf) => {
2793 if let Some(body) = cf.body.take() {
2794 cf.body = Some(match body {
2795 FunctionBody::Expression(expr) => {
2796 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2797 }
2798 FunctionBody::Return(expr) => {
2799 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2800 }
2801 FunctionBody::Statements(stmts) => {
2802 let transformed_stmts = stmts
2803 .into_iter()
2804 .map(|s| transform_recursive(s, transform_fn))
2805 .collect::<Result<Vec<_>>>()?;
2806 FunctionBody::Statements(transformed_stmts)
2807 }
2808 other => other,
2809 });
2810 }
2811 Expression::CreateFunction(cf)
2812 }
2813
2814 // MemberOf: recurse into left and right operands
2815 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
2816 // ArrayContainsAll (@>): recurse into left and right operands
2817 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
2818 // ArrayContainedBy (<@): recurse into left and right operands
2819 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
2820 // ArrayOverlaps (&&): recurse into left and right operands
2821 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
2822 // TsMatch (@@): recurse into left and right operands
2823 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
2824 // Adjacent (-|-): recurse into left and right operands
2825 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
2826
2827 // Table: recurse into when (HistoricalData) and changes fields
2828 Expression::Table(mut t) => {
2829 if let Some(when) = t.when.take() {
2830 let transformed =
2831 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
2832 if let Expression::HistoricalData(hd) = transformed {
2833 t.when = Some(hd);
2834 }
2835 }
2836 if let Some(changes) = t.changes.take() {
2837 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
2838 if let Expression::Changes(c) = transformed {
2839 t.changes = Some(c);
2840 }
2841 }
2842 Expression::Table(t)
2843 }
2844
2845 // HistoricalData (Snowflake time travel): recurse into expression
2846 Expression::HistoricalData(mut hd) => {
2847 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
2848 Expression::HistoricalData(hd)
2849 }
2850
2851 // Changes (Snowflake CHANGES clause): recurse into at_before and end
2852 Expression::Changes(mut c) => {
2853 if let Some(at_before) = c.at_before.take() {
2854 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
2855 }
2856 if let Some(end) = c.end.take() {
2857 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
2858 }
2859 Expression::Changes(c)
2860 }
2861
2862 // TableArgument: TABLE(expr) or MODEL(expr)
2863 Expression::TableArgument(mut ta) => {
2864 ta.this = transform_recursive(ta.this, transform_fn)?;
2865 Expression::TableArgument(ta)
2866 }
2867
2868 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
2869 Expression::JoinedTable(mut jt) => {
2870 jt.left = transform_recursive(jt.left, transform_fn)?;
2871 for join in &mut jt.joins {
2872 join.this = transform_recursive(
2873 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
2874 transform_fn,
2875 )?;
2876 if let Some(on) = join.on.take() {
2877 join.on = Some(transform_recursive(on, transform_fn)?);
2878 }
2879 }
2880 jt.lateral_views = jt
2881 .lateral_views
2882 .into_iter()
2883 .map(|mut lv| {
2884 lv.this = transform_recursive(lv.this, transform_fn)?;
2885 Ok(lv)
2886 })
2887 .collect::<Result<Vec<_>>>()?;
2888 Expression::JoinedTable(jt)
2889 }
2890
2891 // Lateral: LATERAL func() - recurse into the function expression
2892 Expression::Lateral(mut lat) => {
2893 *lat.this = transform_recursive(*lat.this, transform_fn)?;
2894 Expression::Lateral(lat)
2895 }
2896
2897 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
2898 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
2899 // as a unit together with the WithinGroup wrapper
2900 Expression::WithinGroup(mut wg) => {
2901 wg.order_by = wg
2902 .order_by
2903 .into_iter()
2904 .map(|mut o| {
2905 let original = o.this.clone();
2906 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2907 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2908 Ok(Expression::Ordered(transformed)) => *transformed,
2909 Ok(_) | Err(_) => o,
2910 }
2911 })
2912 .collect();
2913 Expression::WithinGroup(wg)
2914 }
2915
2916 // Filter: recurse into both the aggregate and the filter condition
2917 Expression::Filter(mut f) => {
2918 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
2919 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
2920 Expression::Filter(f)
2921 }
2922
2923 // Aggregate functions (AggFunc-based): recurse into the aggregate argument,
2924 // filter, order_by, having_max, and limit.
2925 // Stddev, StddevSamp, Variance, and ArrayAgg are handled earlier in this match.
2926 Expression::Sum(f) => recurse_agg!(Sum, f),
2927 Expression::Avg(f) => recurse_agg!(Avg, f),
2928 Expression::Min(f) => recurse_agg!(Min, f),
2929 Expression::Max(f) => recurse_agg!(Max, f),
2930 Expression::CountIf(f) => recurse_agg!(CountIf, f),
2931 Expression::StddevPop(f) => recurse_agg!(StddevPop, f),
2932 Expression::VarPop(f) => recurse_agg!(VarPop, f),
2933 Expression::VarSamp(f) => recurse_agg!(VarSamp, f),
2934 Expression::Median(f) => recurse_agg!(Median, f),
2935 Expression::Mode(f) => recurse_agg!(Mode, f),
2936 Expression::First(f) => recurse_agg!(First, f),
2937 Expression::Last(f) => recurse_agg!(Last, f),
2938 Expression::AnyValue(f) => recurse_agg!(AnyValue, f),
2939 Expression::ApproxDistinct(f) => recurse_agg!(ApproxDistinct, f),
2940 Expression::ApproxCountDistinct(f) => recurse_agg!(ApproxCountDistinct, f),
2941 Expression::LogicalAnd(f) => recurse_agg!(LogicalAnd, f),
2942 Expression::LogicalOr(f) => recurse_agg!(LogicalOr, f),
2943 Expression::Skewness(f) => recurse_agg!(Skewness, f),
2944 Expression::ArrayConcatAgg(f) => recurse_agg!(ArrayConcatAgg, f),
2945 Expression::ArrayUniqueAgg(f) => recurse_agg!(ArrayUniqueAgg, f),
2946 Expression::BoolXorAgg(f) => recurse_agg!(BoolXorAgg, f),
2947 Expression::BitwiseOrAgg(f) => recurse_agg!(BitwiseOrAgg, f),
2948 Expression::BitwiseAndAgg(f) => recurse_agg!(BitwiseAndAgg, f),
2949 Expression::BitwiseXorAgg(f) => recurse_agg!(BitwiseXorAgg, f),
2950
2951 // Count has its own struct with an Option<Expression> `this` field
2952 Expression::Count(mut c) => {
2953 if let Some(this) = c.this.take() {
2954 c.this = Some(transform_recursive(this, transform_fn)?);
2955 }
2956 if let Some(filter) = c.filter.take() {
2957 c.filter = Some(transform_recursive(filter, transform_fn)?);
2958 }
2959 Expression::Count(c)
2960 }
2961
2962 Expression::PipeOperator(mut pipe) => {
2963 pipe.this = transform_recursive(pipe.this, transform_fn)?;
2964 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
2965 Expression::PipeOperator(pipe)
2966 }
2967
2968 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
2969 Expression::ArrayExcept(mut f) => {
2970 f.this = transform_recursive(f.this, transform_fn)?;
2971 f.expression = transform_recursive(f.expression, transform_fn)?;
2972 Expression::ArrayExcept(f)
2973 }
2974 Expression::ArrayContains(mut f) => {
2975 f.this = transform_recursive(f.this, transform_fn)?;
2976 f.expression = transform_recursive(f.expression, transform_fn)?;
2977 Expression::ArrayContains(f)
2978 }
2979 Expression::ArrayDistinct(mut f) => {
2980 f.this = transform_recursive(f.this, transform_fn)?;
2981 Expression::ArrayDistinct(f)
2982 }
2983 Expression::ArrayPosition(mut f) => {
2984 f.this = transform_recursive(f.this, transform_fn)?;
2985 f.expression = transform_recursive(f.expression, transform_fn)?;
2986 Expression::ArrayPosition(f)
2987 }
2988
2989 // Pass through leaf nodes unchanged
2990 other => other,
2991 };
2992
2993 // Then apply the transform function
2994 transform_fn(expr)
2995}
2996
2997/// Returns the tokenizer config, generator config, and expression transform closure
2998/// for a built-in dialect type. This is the shared implementation used by both
2999/// `Dialect::get()` and custom dialect construction.
3000// ---------------------------------------------------------------------------
3001// Cached dialect configurations
3002// ---------------------------------------------------------------------------
3003
3004/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
3005/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
3006struct CachedDialectConfig {
3007 tokenizer_config: TokenizerConfig,
3008 generator_config: Arc<GeneratorConfig>,
3009}
3010
3011/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
3012macro_rules! cached_dialect {
3013 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
3014 #[cfg(feature = $feature)]
3015 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
3016 let d = $dialect_struct;
3017 CachedDialectConfig {
3018 tokenizer_config: d.tokenizer_config(),
3019 generator_config: Arc::new(d.generator_config()),
3020 }
3021 });
3022 };
3023}
3024
3025static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
3026 let d = GenericDialect;
3027 CachedDialectConfig {
3028 tokenizer_config: d.tokenizer_config(),
3029 generator_config: Arc::new(d.generator_config()),
3030 }
3031});
3032
3033cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
3034cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
3035cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
3036cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
3037cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
3038cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
3039cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
3040cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
3041cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
3042cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
3043cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
3044cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
3045cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
3046cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
3047cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
3048cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
3049cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
3050cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
3051cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
3052cached_dialect!(
3053 CACHED_MATERIALIZE,
3054 MaterializeDialect,
3055 "dialect-materialize"
3056);
3057cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
3058cached_dialect!(
3059 CACHED_SINGLESTORE,
3060 SingleStoreDialect,
3061 "dialect-singlestore"
3062);
3063cached_dialect!(
3064 CACHED_COCKROACHDB,
3065 CockroachDBDialect,
3066 "dialect-cockroachdb"
3067);
3068cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
3069cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
3070cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
3071cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
3072cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
3073cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
3074cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
3075cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
3076cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
3077cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
3078
3079fn configs_for_dialect_type(
3080 dt: DialectType,
3081) -> (
3082 TokenizerConfig,
3083 Arc<GeneratorConfig>,
3084 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3085) {
3086 /// Clone configs from a cached static and pair with a fresh transform closure.
3087 macro_rules! from_cache {
3088 ($cache:expr, $dialect_struct:expr) => {{
3089 let c = &*$cache;
3090 (
3091 c.tokenizer_config.clone(),
3092 c.generator_config.clone(),
3093 Box::new(move |e| $dialect_struct.transform_expr(e)),
3094 )
3095 }};
3096 }
3097 match dt {
3098 #[cfg(feature = "dialect-postgresql")]
3099 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
3100 #[cfg(feature = "dialect-mysql")]
3101 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
3102 #[cfg(feature = "dialect-bigquery")]
3103 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
3104 #[cfg(feature = "dialect-snowflake")]
3105 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
3106 #[cfg(feature = "dialect-duckdb")]
3107 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
3108 #[cfg(feature = "dialect-tsql")]
3109 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
3110 #[cfg(feature = "dialect-oracle")]
3111 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
3112 #[cfg(feature = "dialect-hive")]
3113 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
3114 #[cfg(feature = "dialect-spark")]
3115 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
3116 #[cfg(feature = "dialect-sqlite")]
3117 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
3118 #[cfg(feature = "dialect-presto")]
3119 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
3120 #[cfg(feature = "dialect-trino")]
3121 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
3122 #[cfg(feature = "dialect-redshift")]
3123 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
3124 #[cfg(feature = "dialect-clickhouse")]
3125 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
3126 #[cfg(feature = "dialect-databricks")]
3127 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
3128 #[cfg(feature = "dialect-athena")]
3129 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
3130 #[cfg(feature = "dialect-teradata")]
3131 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
3132 #[cfg(feature = "dialect-doris")]
3133 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
3134 #[cfg(feature = "dialect-starrocks")]
3135 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
3136 #[cfg(feature = "dialect-materialize")]
3137 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
3138 #[cfg(feature = "dialect-risingwave")]
3139 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
3140 #[cfg(feature = "dialect-singlestore")]
3141 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
3142 #[cfg(feature = "dialect-cockroachdb")]
3143 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
3144 #[cfg(feature = "dialect-tidb")]
3145 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
3146 #[cfg(feature = "dialect-druid")]
3147 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
3148 #[cfg(feature = "dialect-solr")]
3149 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
3150 #[cfg(feature = "dialect-tableau")]
3151 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
3152 #[cfg(feature = "dialect-dune")]
3153 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
3154 #[cfg(feature = "dialect-fabric")]
3155 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
3156 #[cfg(feature = "dialect-drill")]
3157 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
3158 #[cfg(feature = "dialect-dremio")]
3159 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
3160 #[cfg(feature = "dialect-exasol")]
3161 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
3162 #[cfg(feature = "dialect-datafusion")]
3163 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
3164 _ => from_cache!(CACHED_GENERIC, GenericDialect),
3165 }
3166}
3167
3168// ---------------------------------------------------------------------------
3169// Custom dialect registry
3170// ---------------------------------------------------------------------------
3171
3172static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
3173 LazyLock::new(|| RwLock::new(HashMap::new()));
3174
3175struct CustomDialectConfig {
3176 name: String,
3177 base_dialect: DialectType,
3178 tokenizer_config: TokenizerConfig,
3179 generator_config: GeneratorConfig,
3180 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3181 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3182}
3183
3184/// Fluent builder for creating and registering custom SQL dialects.
3185///
3186/// A custom dialect is based on an existing built-in dialect and allows selective
3187/// overrides of tokenizer configuration, generator configuration, and expression
3188/// transforms.
3189///
3190/// # Example
3191///
3192/// ```rust,ignore
3193/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
3194/// use polyglot_sql::generator::NormalizeFunctions;
3195///
3196/// CustomDialectBuilder::new("my_postgres")
3197/// .based_on(DialectType::PostgreSQL)
3198/// .generator_config_modifier(|gc| {
3199/// gc.normalize_functions = NormalizeFunctions::Lower;
3200/// })
3201/// .register()
3202/// .unwrap();
3203///
3204/// let d = Dialect::get_by_name("my_postgres").unwrap();
3205/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
3206/// let sql = d.generate(&exprs[0]).unwrap();
3207/// assert_eq!(sql, "select count(*)");
3208///
3209/// polyglot_sql::unregister_custom_dialect("my_postgres");
3210/// ```
3211pub struct CustomDialectBuilder {
3212 name: String,
3213 base_dialect: DialectType,
3214 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
3215 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
3216 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3217 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3218}
3219
3220impl CustomDialectBuilder {
3221 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
3222 pub fn new(name: impl Into<String>) -> Self {
3223 Self {
3224 name: name.into(),
3225 base_dialect: DialectType::Generic,
3226 tokenizer_modifier: None,
3227 generator_modifier: None,
3228 transform: None,
3229 preprocess: None,
3230 }
3231 }
3232
3233 /// Set the base built-in dialect to inherit configuration from.
3234 pub fn based_on(mut self, dialect: DialectType) -> Self {
3235 self.base_dialect = dialect;
3236 self
3237 }
3238
3239 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
3240 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
3241 where
3242 F: FnOnce(&mut TokenizerConfig) + 'static,
3243 {
3244 self.tokenizer_modifier = Some(Box::new(f));
3245 self
3246 }
3247
3248 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
3249 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
3250 where
3251 F: FnOnce(&mut GeneratorConfig) + 'static,
3252 {
3253 self.generator_modifier = Some(Box::new(f));
3254 self
3255 }
3256
3257 /// Set a custom per-node expression transform function.
3258 ///
3259 /// This replaces the base dialect's transform. It is called on every expression
3260 /// node during the recursive transform pass.
3261 pub fn transform_fn<F>(mut self, f: F) -> Self
3262 where
3263 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3264 {
3265 self.transform = Some(Arc::new(f));
3266 self
3267 }
3268
3269 /// Set a custom whole-tree preprocessing function.
3270 ///
3271 /// This replaces the base dialect's built-in preprocessing. It is called once
3272 /// on the entire expression tree before the recursive per-node transform.
3273 pub fn preprocess_fn<F>(mut self, f: F) -> Self
3274 where
3275 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3276 {
3277 self.preprocess = Some(Arc::new(f));
3278 self
3279 }
3280
3281 /// Build the custom dialect configuration and register it in the global registry.
3282 ///
3283 /// Returns an error if:
3284 /// - The name collides with a built-in dialect name
3285 /// - A custom dialect with the same name is already registered
3286 pub fn register(self) -> Result<()> {
3287 // Reject names that collide with built-in dialects
3288 if DialectType::from_str(&self.name).is_ok() {
3289 return Err(crate::error::Error::parse(
3290 format!(
3291 "Cannot register custom dialect '{}': name collides with built-in dialect",
3292 self.name
3293 ),
3294 0,
3295 0,
3296 0,
3297 0,
3298 ));
3299 }
3300
3301 // Get base configs
3302 let (mut tok_config, arc_gen_config, _base_transform) =
3303 configs_for_dialect_type(self.base_dialect);
3304 let mut gen_config = (*arc_gen_config).clone();
3305
3306 // Apply modifiers
3307 if let Some(tok_mod) = self.tokenizer_modifier {
3308 tok_mod(&mut tok_config);
3309 }
3310 if let Some(gen_mod) = self.generator_modifier {
3311 gen_mod(&mut gen_config);
3312 }
3313
3314 let config = CustomDialectConfig {
3315 name: self.name.clone(),
3316 base_dialect: self.base_dialect,
3317 tokenizer_config: tok_config,
3318 generator_config: gen_config,
3319 transform: self.transform,
3320 preprocess: self.preprocess,
3321 };
3322
3323 register_custom_dialect(config)
3324 }
3325}
3326
3327use std::str::FromStr;
3328
3329fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
3330 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
3331 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
3332 })?;
3333
3334 if registry.contains_key(&config.name) {
3335 return Err(crate::error::Error::parse(
3336 format!("Custom dialect '{}' is already registered", config.name),
3337 0,
3338 0,
3339 0,
3340 0,
3341 ));
3342 }
3343
3344 registry.insert(config.name.clone(), Arc::new(config));
3345 Ok(())
3346}
3347
3348/// Remove a custom dialect from the global registry.
3349///
3350/// Returns `true` if a dialect with that name was found and removed,
3351/// `false` if no such custom dialect existed.
3352pub fn unregister_custom_dialect(name: &str) -> bool {
3353 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
3354 registry.remove(name).is_some()
3355 } else {
3356 false
3357 }
3358}
3359
3360fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
3361 CUSTOM_DIALECT_REGISTRY
3362 .read()
3363 .ok()
3364 .and_then(|registry| registry.get(name).cloned())
3365}
3366
3367/// Main entry point for dialect-specific SQL operations.
3368///
3369/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
3370/// transformer for a specific SQL database engine. It is the high-level API through
3371/// which callers parse, generate, transform, and transpile SQL.
3372///
3373/// # Usage
3374///
3375/// ```rust,ignore
3376/// use polyglot_sql::dialects::{Dialect, DialectType};
3377///
3378/// // Parse PostgreSQL SQL into an AST
3379/// let pg = Dialect::get(DialectType::PostgreSQL);
3380/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
3381///
3382/// // Transpile from PostgreSQL to BigQuery
3383/// let results = pg.transpile("SELECT NOW()", DialectType::BigQuery)?;
3384/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
3385/// ```
3386///
3387/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
3388/// The struct is `Send + Sync` safe so it can be shared across threads.
3389pub struct Dialect {
3390 dialect_type: DialectType,
3391 tokenizer: Tokenizer,
3392 generator_config: Arc<GeneratorConfig>,
3393 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3394 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
3395 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
3396 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
3397 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3398}
3399
3400/// Options for [`Dialect::transpile_with`].
3401///
3402/// Use [`TranspileOptions::default`] for defaults, then tweak the fields you need.
3403/// The struct is marked `#[non_exhaustive]` so new fields can be added without
3404/// breaking the API.
3405///
3406/// The struct derives `Serialize`/`Deserialize` using camelCase field names so
3407/// it can be round-tripped over JSON bridges (C FFI, WASM) without mapping.
3408#[derive(Debug, Clone, Default, Serialize, Deserialize)]
3409#[serde(rename_all = "camelCase", default)]
3410#[non_exhaustive]
3411pub struct TranspileOptions {
3412 /// Whether to pretty-print the output SQL.
3413 pub pretty: bool,
3414}
3415
3416impl TranspileOptions {
3417 /// Construct options with pretty-printing enabled.
3418 pub fn pretty() -> Self {
3419 Self { pretty: true }
3420 }
3421}
3422
3423/// A value that can be used as the target dialect in [`Dialect::transpile`] /
3424/// [`Dialect::transpile_with`].
3425///
3426/// Implemented for [`DialectType`] (built-in dialect enum) and `&Dialect` (any
3427/// dialect handle, including custom ones). End users do not normally need to
3428/// implement this trait themselves.
3429pub trait TranspileTarget {
3430 /// Invoke `f` with a reference to the resolved target dialect.
3431 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R;
3432}
3433
3434impl TranspileTarget for DialectType {
3435 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3436 f(&Dialect::get(self))
3437 }
3438}
3439
3440impl TranspileTarget for &Dialect {
3441 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3442 f(self)
3443 }
3444}
3445
3446impl Dialect {
3447 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
3448 ///
3449 /// This is the primary constructor. It initializes the tokenizer, generator config,
3450 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
3451 /// For hybrid dialects like Athena, it also sets up expression-specific generator
3452 /// config routing.
3453 pub fn get(dialect_type: DialectType) -> Self {
3454 let (tokenizer_config, generator_config, transformer) =
3455 configs_for_dialect_type(dialect_type);
3456
3457 // Set up expression-specific generator config for hybrid dialects
3458 let generator_config_for_expr: Option<
3459 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
3460 > = match dialect_type {
3461 #[cfg(feature = "dialect-athena")]
3462 DialectType::Athena => Some(Box::new(|expr| {
3463 AthenaDialect.generator_config_for_expr(expr)
3464 })),
3465 _ => None,
3466 };
3467
3468 Self {
3469 dialect_type,
3470 tokenizer: Tokenizer::new(tokenizer_config),
3471 generator_config,
3472 transformer,
3473 generator_config_for_expr,
3474 custom_preprocess: None,
3475 }
3476 }
3477
3478 /// Look up a dialect by string name.
3479 ///
3480 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
3481 /// falls back to the custom dialect registry. Returns `None` if no dialect
3482 /// with the given name exists.
3483 pub fn get_by_name(name: &str) -> Option<Self> {
3484 // Try built-in first
3485 if let Ok(dt) = DialectType::from_str(name) {
3486 return Some(Self::get(dt));
3487 }
3488
3489 // Try custom registry
3490 let config = get_custom_dialect_config(name)?;
3491 Some(Self::from_custom_config(&config))
3492 }
3493
3494 /// Construct a `Dialect` from a custom dialect configuration.
3495 fn from_custom_config(config: &CustomDialectConfig) -> Self {
3496 // Build the transformer: use custom if provided, else use base dialect's
3497 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
3498 if let Some(ref custom_transform) = config.transform {
3499 let t = Arc::clone(custom_transform);
3500 Box::new(move |e| t(e))
3501 } else {
3502 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
3503 base_transform
3504 };
3505
3506 // Build the custom preprocess: use custom if provided
3507 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
3508 config.preprocess.as_ref().map(|p| {
3509 let p = Arc::clone(p);
3510 Box::new(move |e: Expression| p(e))
3511 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
3512 });
3513
3514 Self {
3515 dialect_type: config.base_dialect,
3516 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
3517 generator_config: Arc::new(config.generator_config.clone()),
3518 transformer,
3519 generator_config_for_expr: None,
3520 custom_preprocess,
3521 }
3522 }
3523
3524 /// Get the dialect type
3525 pub fn dialect_type(&self) -> DialectType {
3526 self.dialect_type
3527 }
3528
3529 /// Get the generator configuration
3530 pub fn generator_config(&self) -> &GeneratorConfig {
3531 &self.generator_config
3532 }
3533
3534 /// Parses a SQL string into a list of [`Expression`] AST nodes.
3535 ///
3536 /// The input may contain multiple semicolon-separated statements; each one
3537 /// produces a separate element in the returned vector. Tokenization uses
3538 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
3539 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
3540 let tokens = self.tokenizer.tokenize(sql)?;
3541 let config = crate::parser::ParserConfig {
3542 dialect: Some(self.dialect_type),
3543 ..Default::default()
3544 };
3545 let mut parser = Parser::with_source(tokens, config, sql.to_string());
3546 parser.parse()
3547 }
3548
3549 /// Tokenize SQL using this dialect's tokenizer configuration.
3550 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
3551 self.tokenizer.tokenize(sql)
3552 }
3553
3554 /// Get the generator config for a specific expression (supports hybrid dialects).
3555 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
3556 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
3557 if let Some(ref config_fn) = self.generator_config_for_expr {
3558 config_fn(expr)
3559 } else {
3560 (*self.generator_config).clone()
3561 }
3562 }
3563
3564 /// Generates a SQL string from an [`Expression`] AST node.
3565 ///
3566 /// The output uses this dialect's generator configuration for identifier quoting,
3567 /// keyword casing, function name normalization, and syntax style. The result is
3568 /// a single-line (non-pretty) SQL string.
3569 pub fn generate(&self, expr: &Expression) -> Result<String> {
3570 // Fast path: when no per-expression config override, share the Arc cheaply.
3571 if self.generator_config_for_expr.is_none() {
3572 let mut generator = Generator::with_arc_config(self.generator_config.clone());
3573 return generator.generate(expr);
3574 }
3575 let config = self.get_config_for_expr(expr);
3576 let mut generator = Generator::with_config(config);
3577 generator.generate(expr)
3578 }
3579
3580 /// Generate SQL from an expression with pretty printing enabled
3581 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
3582 let mut config = self.get_config_for_expr(expr);
3583 config.pretty = true;
3584 let mut generator = Generator::with_config(config);
3585 generator.generate(expr)
3586 }
3587
3588 /// Generate SQL from an expression with source dialect info (for transpilation)
3589 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
3590 let mut config = self.get_config_for_expr(expr);
3591 config.source_dialect = Some(source);
3592 let mut generator = Generator::with_config(config);
3593 generator.generate(expr)
3594 }
3595
3596 /// Generate SQL from an expression with pretty printing and source dialect info
3597 pub fn generate_pretty_with_source(
3598 &self,
3599 expr: &Expression,
3600 source: DialectType,
3601 ) -> Result<String> {
3602 let mut config = self.get_config_for_expr(expr);
3603 config.pretty = true;
3604 config.source_dialect = Some(source);
3605 let mut generator = Generator::with_config(config);
3606 generator.generate(expr)
3607 }
3608
3609 /// Generate SQL from an expression with forced identifier quoting (identify=True)
3610 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
3611 let mut config = self.get_config_for_expr(expr);
3612 config.always_quote_identifiers = true;
3613 let mut generator = Generator::with_config(config);
3614 generator.generate(expr)
3615 }
3616
3617 /// Generate SQL from an expression with pretty printing and forced identifier quoting
3618 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
3619 let mut config = (*self.generator_config).clone();
3620 config.pretty = true;
3621 config.always_quote_identifiers = true;
3622 let mut generator = Generator::with_config(config);
3623 generator.generate(expr)
3624 }
3625
3626 /// Generate SQL from an expression with caller-specified config overrides
3627 pub fn generate_with_overrides(
3628 &self,
3629 expr: &Expression,
3630 overrides: impl FnOnce(&mut GeneratorConfig),
3631 ) -> Result<String> {
3632 let mut config = self.get_config_for_expr(expr);
3633 overrides(&mut config);
3634 let mut generator = Generator::with_config(config);
3635 generator.generate(expr)
3636 }
3637
3638 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
3639 ///
3640 /// The transformation proceeds in two phases:
3641 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
3642 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
3643 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
3644 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
3645 ///
3646 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
3647 /// and for identity transforms (normalizing SQL within the same dialect).
3648 pub fn transform(&self, expr: Expression) -> Result<Expression> {
3649 // Apply preprocessing transforms based on dialect
3650 let preprocessed = self.preprocess(expr)?;
3651 // Then apply recursive transformation
3652 transform_recursive(preprocessed, &self.transformer)
3653 }
3654
3655 /// Apply dialect-specific preprocessing transforms
3656 fn preprocess(&self, expr: Expression) -> Result<Expression> {
3657 // If a custom preprocess function is set, use it instead of the built-in logic
3658 if let Some(ref custom_preprocess) = self.custom_preprocess {
3659 return custom_preprocess(expr);
3660 }
3661
3662 #[cfg(any(
3663 feature = "dialect-mysql",
3664 feature = "dialect-postgresql",
3665 feature = "dialect-bigquery",
3666 feature = "dialect-snowflake",
3667 feature = "dialect-tsql",
3668 feature = "dialect-spark",
3669 feature = "dialect-databricks",
3670 feature = "dialect-hive",
3671 feature = "dialect-sqlite",
3672 feature = "dialect-trino",
3673 feature = "dialect-presto",
3674 feature = "dialect-duckdb",
3675 feature = "dialect-redshift",
3676 feature = "dialect-starrocks",
3677 feature = "dialect-oracle",
3678 feature = "dialect-clickhouse",
3679 ))]
3680 use crate::transforms;
3681
3682 match self.dialect_type {
3683 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
3684 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
3685 #[cfg(feature = "dialect-mysql")]
3686 DialectType::MySQL => {
3687 let expr = transforms::eliminate_qualify(expr)?;
3688 let expr = transforms::eliminate_full_outer_join(expr)?;
3689 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3690 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3691 Ok(expr)
3692 }
3693 // PostgreSQL doesn't support QUALIFY
3694 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
3695 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
3696 #[cfg(feature = "dialect-postgresql")]
3697 DialectType::PostgreSQL => {
3698 let expr = transforms::eliminate_qualify(expr)?;
3699 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3700 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
3701 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
3702 // Only normalize when sqlglot would fully parse (no body) —
3703 // sqlglot falls back to Command for complex function bodies,
3704 // preserving the original text including TO.
3705 let expr = if let Expression::CreateFunction(mut cf) = expr {
3706 if cf.body.is_none() {
3707 for opt in &mut cf.set_options {
3708 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
3709 &mut opt.value
3710 {
3711 *use_to = false;
3712 }
3713 }
3714 }
3715 Expression::CreateFunction(cf)
3716 } else {
3717 expr
3718 };
3719 Ok(expr)
3720 }
3721 // BigQuery doesn't support DISTINCT ON or CTE column aliases
3722 #[cfg(feature = "dialect-bigquery")]
3723 DialectType::BigQuery => {
3724 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3725 let expr = transforms::pushdown_cte_column_names(expr)?;
3726 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
3727 Ok(expr)
3728 }
3729 // Snowflake
3730 #[cfg(feature = "dialect-snowflake")]
3731 DialectType::Snowflake => {
3732 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3733 let expr = transforms::eliminate_window_clause(expr)?;
3734 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
3735 Ok(expr)
3736 }
3737 // TSQL doesn't support QUALIFY
3738 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
3739 // TSQL doesn't support CTEs in subqueries (hoist to top level)
3740 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
3741 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
3742 #[cfg(feature = "dialect-tsql")]
3743 DialectType::TSQL => {
3744 let expr = transforms::eliminate_qualify(expr)?;
3745 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3746 let expr = transforms::ensure_bools(expr)?;
3747 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3748 let expr = transforms::move_ctes_to_top_level(expr)?;
3749 let expr = transforms::qualify_derived_table_outputs(expr)?;
3750 Ok(expr)
3751 }
3752 // Spark doesn't support QUALIFY (but Databricks does)
3753 // Spark doesn't support CTEs in subqueries (hoist to top level)
3754 #[cfg(feature = "dialect-spark")]
3755 DialectType::Spark => {
3756 let expr = transforms::eliminate_qualify(expr)?;
3757 let expr = transforms::add_auto_table_alias(expr)?;
3758 let expr = transforms::simplify_nested_paren_values(expr)?;
3759 let expr = transforms::move_ctes_to_top_level(expr)?;
3760 Ok(expr)
3761 }
3762 // Databricks supports QUALIFY natively
3763 // Databricks doesn't support CTEs in subqueries (hoist to top level)
3764 #[cfg(feature = "dialect-databricks")]
3765 DialectType::Databricks => {
3766 let expr = transforms::add_auto_table_alias(expr)?;
3767 let expr = transforms::simplify_nested_paren_values(expr)?;
3768 let expr = transforms::move_ctes_to_top_level(expr)?;
3769 Ok(expr)
3770 }
3771 // Hive doesn't support QUALIFY or CTEs in subqueries
3772 #[cfg(feature = "dialect-hive")]
3773 DialectType::Hive => {
3774 let expr = transforms::eliminate_qualify(expr)?;
3775 let expr = transforms::move_ctes_to_top_level(expr)?;
3776 Ok(expr)
3777 }
3778 // SQLite doesn't support QUALIFY
3779 #[cfg(feature = "dialect-sqlite")]
3780 DialectType::SQLite => {
3781 let expr = transforms::eliminate_qualify(expr)?;
3782 Ok(expr)
3783 }
3784 // Trino doesn't support QUALIFY
3785 #[cfg(feature = "dialect-trino")]
3786 DialectType::Trino => {
3787 let expr = transforms::eliminate_qualify(expr)?;
3788 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
3789 Ok(expr)
3790 }
3791 // Presto doesn't support QUALIFY or WINDOW clause
3792 #[cfg(feature = "dialect-presto")]
3793 DialectType::Presto => {
3794 let expr = transforms::eliminate_qualify(expr)?;
3795 let expr = transforms::eliminate_window_clause(expr)?;
3796 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
3797 Ok(expr)
3798 }
3799 // DuckDB supports QUALIFY - no elimination needed
3800 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
3801 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
3802 #[cfg(feature = "dialect-duckdb")]
3803 DialectType::DuckDB => {
3804 let expr = transforms::expand_posexplode_duckdb(expr)?;
3805 let expr = transforms::expand_like_any(expr)?;
3806 Ok(expr)
3807 }
3808 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
3809 #[cfg(feature = "dialect-redshift")]
3810 DialectType::Redshift => {
3811 let expr = transforms::eliminate_qualify(expr)?;
3812 let expr = transforms::eliminate_window_clause(expr)?;
3813 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3814 Ok(expr)
3815 }
3816 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
3817 #[cfg(feature = "dialect-starrocks")]
3818 DialectType::StarRocks => {
3819 let expr = transforms::eliminate_qualify(expr)?;
3820 let expr = transforms::expand_between_in_delete(expr)?;
3821 let expr = transforms::eliminate_distinct_on_for_dialect(
3822 expr,
3823 Some(DialectType::StarRocks),
3824 Some(DialectType::StarRocks),
3825 )?;
3826 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3827 Ok(expr)
3828 }
3829 // DataFusion supports QUALIFY and semi/anti joins natively
3830 #[cfg(feature = "dialect-datafusion")]
3831 DialectType::DataFusion => Ok(expr),
3832 // Oracle doesn't support QUALIFY
3833 #[cfg(feature = "dialect-oracle")]
3834 DialectType::Oracle => {
3835 let expr = transforms::eliminate_qualify(expr)?;
3836 Ok(expr)
3837 }
3838 // Drill - no special preprocessing needed
3839 #[cfg(feature = "dialect-drill")]
3840 DialectType::Drill => Ok(expr),
3841 // Teradata - no special preprocessing needed
3842 #[cfg(feature = "dialect-teradata")]
3843 DialectType::Teradata => Ok(expr),
3844 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
3845 #[cfg(feature = "dialect-clickhouse")]
3846 DialectType::ClickHouse => {
3847 let expr = transforms::no_limit_order_by_union(expr)?;
3848 Ok(expr)
3849 }
3850 // Other dialects - no preprocessing
3851 _ => Ok(expr),
3852 }
3853 }
3854
3855 /// Transpile SQL from this dialect to the given target dialect.
3856 ///
3857 /// The target may be specified as either a built-in [`DialectType`] enum variant
3858 /// or as a reference to a [`Dialect`] handle (built-in or custom). Both work:
3859 ///
3860 /// ```rust,ignore
3861 /// let pg = Dialect::get(DialectType::PostgreSQL);
3862 /// pg.transpile("SELECT NOW()", DialectType::BigQuery)?; // enum
3863 /// pg.transpile("SELECT NOW()", &custom_dialect)?; // handle
3864 /// ```
3865 ///
3866 /// For pretty-printing or other options, use [`transpile_with`](Self::transpile_with).
3867 pub fn transpile<T: TranspileTarget>(&self, sql: &str, target: T) -> Result<Vec<String>> {
3868 self.transpile_with(sql, target, TranspileOptions::default())
3869 }
3870
3871 /// Transpile SQL with configurable [`TranspileOptions`] (e.g. pretty-printing).
3872 pub fn transpile_with<T: TranspileTarget>(
3873 &self,
3874 sql: &str,
3875 target: T,
3876 opts: TranspileOptions,
3877 ) -> Result<Vec<String>> {
3878 target.with_dialect(|td| self.transpile_inner(sql, td, opts.pretty))
3879 }
3880
3881 #[cfg(not(feature = "transpile"))]
3882 fn transpile_inner(
3883 &self,
3884 sql: &str,
3885 target_dialect: &Dialect,
3886 pretty: bool,
3887 ) -> Result<Vec<String>> {
3888 let target = target_dialect.dialect_type;
3889 // Without the transpile feature, only same-dialect or to/from generic is supported
3890 if self.dialect_type != target
3891 && self.dialect_type != DialectType::Generic
3892 && target != DialectType::Generic
3893 {
3894 return Err(crate::error::Error::parse(
3895 "Cross-dialect transpilation not available in this build",
3896 0,
3897 0,
3898 0,
3899 0,
3900 ));
3901 }
3902
3903 let expressions = self.parse(sql)?;
3904 let generic_identity =
3905 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3906
3907 if generic_identity {
3908 return expressions
3909 .into_iter()
3910 .map(|expr| {
3911 if pretty {
3912 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3913 } else {
3914 target_dialect.generate_with_source(&expr, self.dialect_type)
3915 }
3916 })
3917 .collect();
3918 }
3919
3920 expressions
3921 .into_iter()
3922 .map(|expr| {
3923 let transformed = target_dialect.transform(expr)?;
3924 if pretty {
3925 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
3926 } else {
3927 target_dialect.generate_with_source(&transformed, self.dialect_type)
3928 }
3929 })
3930 .collect()
3931 }
3932
3933 #[cfg(feature = "transpile")]
3934 fn transpile_inner(
3935 &self,
3936 sql: &str,
3937 target_dialect: &Dialect,
3938 pretty: bool,
3939 ) -> Result<Vec<String>> {
3940 let target = target_dialect.dialect_type;
3941 if matches!(self.dialect_type, DialectType::PostgreSQL)
3942 && matches!(target, DialectType::SQLite)
3943 {
3944 self.reject_pgvector_distance_operators_for_sqlite(sql)?;
3945 }
3946 let expressions = self.parse(sql)?;
3947 let generic_identity =
3948 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3949
3950 if generic_identity {
3951 return expressions
3952 .into_iter()
3953 .map(|expr| {
3954 if pretty {
3955 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3956 } else {
3957 target_dialect.generate_with_source(&expr, self.dialect_type)
3958 }
3959 })
3960 .collect();
3961 }
3962
3963 expressions
3964 .into_iter()
3965 .map(|expr| {
3966 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
3967 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
3968 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
3969 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
3970 use crate::expressions::DataType as DT;
3971 transform_recursive(expr, &|e| match e {
3972 Expression::DataType(DT::VarChar { .. }) => {
3973 Ok(Expression::DataType(DT::Text))
3974 }
3975 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
3976 _ => Ok(e),
3977 })?
3978 } else {
3979 expr
3980 };
3981
3982 // When source and target differ, first normalize the source dialect's
3983 // AST constructs to standard SQL, so that the target dialect can handle them.
3984 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
3985 let normalized =
3986 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
3987 self.transform(expr)?
3988 } else {
3989 expr
3990 };
3991
3992 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
3993 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
3994 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
3995 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
3996 let normalized =
3997 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
3998 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3999 {
4000 transform_recursive(normalized, &|e| {
4001 if let Expression::Function(ref f) = e {
4002 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
4003 // Check if first arg is JSON_QUERY and second is JSON_VALUE
4004 if let (
4005 Expression::Function(ref jq),
4006 Expression::Function(ref jv),
4007 ) = (&f.args[0], &f.args[1])
4008 {
4009 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
4010 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
4011 {
4012 // Unwrap: return just JSON_QUERY(...)
4013 return Ok(f.args[0].clone());
4014 }
4015 }
4016 }
4017 }
4018 Ok(e)
4019 })?
4020 } else {
4021 normalized
4022 };
4023
4024 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
4025 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
4026 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
4027 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4028 && !matches!(target, DialectType::Snowflake)
4029 {
4030 transform_recursive(normalized, &|e| {
4031 if let Expression::Function(ref f) = e {
4032 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
4033 return Ok(Expression::Localtime(Box::new(
4034 crate::expressions::Localtime { this: None },
4035 )));
4036 }
4037 }
4038 Ok(e)
4039 })?
4040 } else {
4041 normalized
4042 };
4043
4044 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
4045 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
4046 // transform. DuckDB requires the count argument to be BIGINT.
4047 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4048 && matches!(target, DialectType::DuckDB)
4049 {
4050 transform_recursive(normalized, &|e| {
4051 if let Expression::Function(ref f) = e {
4052 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
4053 // Check if first arg is space string literal
4054 if let Expression::Literal(ref lit) = f.args[0] {
4055 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
4056 {
4057 if s == " " {
4058 // Wrap second arg in CAST(... AS BIGINT) if not already
4059 if !matches!(f.args[1], Expression::Cast(_)) {
4060 let mut new_args = f.args.clone();
4061 new_args[1] = Expression::Cast(Box::new(
4062 crate::expressions::Cast {
4063 this: new_args[1].clone(),
4064 to: crate::expressions::DataType::BigInt {
4065 length: None,
4066 },
4067 trailing_comments: Vec::new(),
4068 double_colon_syntax: false,
4069 format: None,
4070 default: None,
4071 inferred_type: None,
4072 },
4073 ));
4074 return Ok(Expression::Function(Box::new(
4075 crate::expressions::Function {
4076 name: f.name.clone(),
4077 args: new_args,
4078 distinct: f.distinct,
4079 trailing_comments: f
4080 .trailing_comments
4081 .clone(),
4082 use_bracket_syntax: f.use_bracket_syntax,
4083 no_parens: f.no_parens,
4084 quoted: f.quoted,
4085 span: None,
4086 inferred_type: None,
4087 },
4088 )));
4089 }
4090 }
4091 }
4092 }
4093 }
4094 }
4095 Ok(e)
4096 })?
4097 } else {
4098 normalized
4099 };
4100
4101 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
4102 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
4103 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4104 && !matches!(target, DialectType::BigQuery)
4105 {
4106 crate::transforms::propagate_struct_field_names(normalized)?
4107 } else {
4108 normalized
4109 };
4110
4111 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
4112 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
4113 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
4114 // functions handle their generator args differently (as float seeds).
4115 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4116 && matches!(target, DialectType::DuckDB)
4117 {
4118 fn make_scaled_random() -> Expression {
4119 let lower =
4120 Expression::Literal(Box::new(crate::expressions::Literal::Number(
4121 "-9.223372036854776E+18".to_string(),
4122 )));
4123 let upper =
4124 Expression::Literal(Box::new(crate::expressions::Literal::Number(
4125 "9.223372036854776e+18".to_string(),
4126 )));
4127 let random_call = Expression::Random(crate::expressions::Random);
4128 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
4129 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
4130 left: upper,
4131 right: lower.clone(),
4132 left_comments: vec![],
4133 operator_comments: vec![],
4134 trailing_comments: vec![],
4135 inferred_type: None,
4136 })),
4137 trailing_comments: vec![],
4138 }));
4139 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
4140 left: random_call,
4141 right: range_size,
4142 left_comments: vec![],
4143 operator_comments: vec![],
4144 trailing_comments: vec![],
4145 inferred_type: None,
4146 }));
4147 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
4148 left: lower,
4149 right: scaled,
4150 left_comments: vec![],
4151 operator_comments: vec![],
4152 trailing_comments: vec![],
4153 inferred_type: None,
4154 }));
4155 Expression::Cast(Box::new(crate::expressions::Cast {
4156 this: shifted,
4157 to: crate::expressions::DataType::BigInt { length: None },
4158 trailing_comments: vec![],
4159 double_colon_syntax: false,
4160 format: None,
4161 default: None,
4162 inferred_type: None,
4163 }))
4164 }
4165
4166 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
4167 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
4168 // This prevents transform_recursive (which is bottom-up) from expanding
4169 // seeded RANDOM into make_scaled_random() and losing the seed value.
4170 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
4171 // and then un-expanded back to Expression::Random by the code below.
4172 let normalized = transform_recursive(normalized, &|e| {
4173 if let Expression::Function(ref f) = e {
4174 let n = f.name.to_ascii_uppercase();
4175 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
4176 if let Expression::Function(mut f) = e {
4177 for arg in f.args.iter_mut() {
4178 if let Expression::Rand(ref r) = arg {
4179 if r.lower.is_none() && r.upper.is_none() {
4180 if let Some(ref seed) = r.seed {
4181 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
4182 // so it won't be expanded by the RANDOM expansion below
4183 *arg = Expression::Function(Box::new(
4184 crate::expressions::Function::new(
4185 "RANDOM".to_string(),
4186 vec![*seed.clone()],
4187 ),
4188 ));
4189 }
4190 }
4191 }
4192 }
4193 return Ok(Expression::Function(f));
4194 }
4195 }
4196 }
4197 Ok(e)
4198 })?;
4199
4200 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
4201 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
4202 // we see the parent. We detect this and undo the expansion by replacing
4203 // the expanded pattern back with Expression::Random.
4204 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
4205 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
4206 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
4207 transform_recursive(normalized, &|e| {
4208 if let Expression::Function(ref f) = e {
4209 let n = f.name.to_ascii_uppercase();
4210 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
4211 if let Expression::Function(mut f) = e {
4212 for arg in f.args.iter_mut() {
4213 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
4214 if let Expression::Cast(ref cast) = arg {
4215 if matches!(
4216 cast.to,
4217 crate::expressions::DataType::BigInt { .. }
4218 ) {
4219 if let Expression::Add(ref add) = cast.this {
4220 if let Expression::Literal(ref lit) = add.left {
4221 if let crate::expressions::Literal::Number(
4222 ref num,
4223 ) = lit.as_ref()
4224 {
4225 if num == "-9.223372036854776E+18" {
4226 *arg = Expression::Random(
4227 crate::expressions::Random,
4228 );
4229 }
4230 }
4231 }
4232 }
4233 }
4234 }
4235 }
4236 return Ok(Expression::Function(f));
4237 }
4238 return Ok(e);
4239 }
4240 }
4241 match e {
4242 Expression::Random(_) => Ok(make_scaled_random()),
4243 // Rand(seed) with no bounds: drop seed and expand
4244 // (DuckDB RANDOM doesn't support seeds)
4245 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
4246 Ok(make_scaled_random())
4247 }
4248 _ => Ok(e),
4249 }
4250 })?
4251 } else {
4252 normalized
4253 };
4254
4255 // Apply cross-dialect semantic normalizations
4256 let normalized =
4257 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
4258
4259 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL)
4260 && matches!(target, DialectType::SQLite)
4261 {
4262 Self::normalize_postgres_to_sqlite_types(normalized)?
4263 } else {
4264 normalized
4265 };
4266
4267 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
4268 // (SELECT UNNEST(..., max_depth => 2)) subquery
4269 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
4270 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4271 && matches!(target, DialectType::DuckDB)
4272 {
4273 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
4274 } else {
4275 normalized
4276 };
4277
4278 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
4279 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
4280 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4281 && matches!(
4282 target,
4283 DialectType::DuckDB
4284 | DialectType::Presto
4285 | DialectType::Trino
4286 | DialectType::Athena
4287 | DialectType::Spark
4288 | DialectType::Databricks
4289 ) {
4290 crate::transforms::unnest_alias_to_column_alias(normalized)?
4291 } else if matches!(self.dialect_type, DialectType::BigQuery)
4292 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
4293 {
4294 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
4295 // but don't convert alias format (no _t0 wrapper)
4296 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
4297 // For Redshift: strip UNNEST when arg is a column reference path
4298 if matches!(target, DialectType::Redshift) {
4299 crate::transforms::strip_unnest_column_refs(result)?
4300 } else {
4301 result
4302 }
4303 } else {
4304 normalized
4305 };
4306
4307 // For Presto/Trino targets from PostgreSQL/Redshift source:
4308 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
4309 let normalized = if matches!(
4310 self.dialect_type,
4311 DialectType::PostgreSQL | DialectType::Redshift
4312 ) && matches!(
4313 target,
4314 DialectType::Presto | DialectType::Trino | DialectType::Athena
4315 ) {
4316 crate::transforms::wrap_unnest_join_aliases(normalized)?
4317 } else {
4318 normalized
4319 };
4320
4321 // Eliminate DISTINCT ON with target-dialect awareness
4322 // This must happen after source transform (which may produce DISTINCT ON)
4323 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
4324 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(
4325 normalized,
4326 Some(target),
4327 Some(self.dialect_type),
4328 )?;
4329
4330 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
4331 let normalized = if matches!(target, DialectType::Snowflake) {
4332 Self::transform_generate_date_array_snowflake(normalized)?
4333 } else {
4334 normalized
4335 };
4336
4337 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
4338 let normalized = if matches!(
4339 target,
4340 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4341 ) {
4342 crate::transforms::unnest_to_explode_select(normalized)?
4343 } else {
4344 normalized
4345 };
4346
4347 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
4348 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
4349 crate::transforms::no_limit_order_by_union(normalized)?
4350 } else {
4351 normalized
4352 };
4353
4354 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
4355 // Python sqlglot does this in the TSQL generator, but we can't do it there
4356 // because it would break TSQL -> TSQL identity
4357 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
4358 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4359 {
4360 transform_recursive(normalized, &|e| {
4361 if let Expression::Count(ref c) = e {
4362 // Build COUNT_BIG(...) as an AggregateFunction
4363 let args = if c.star {
4364 vec![Expression::Star(crate::expressions::Star {
4365 table: None,
4366 except: None,
4367 replace: None,
4368 rename: None,
4369 trailing_comments: Vec::new(),
4370 span: None,
4371 })]
4372 } else if let Some(ref this) = c.this {
4373 vec![this.clone()]
4374 } else {
4375 vec![]
4376 };
4377 Ok(Expression::AggregateFunction(Box::new(
4378 crate::expressions::AggregateFunction {
4379 name: "COUNT_BIG".to_string(),
4380 args,
4381 distinct: c.distinct,
4382 filter: c.filter.clone(),
4383 order_by: Vec::new(),
4384 limit: None,
4385 ignore_nulls: None,
4386 inferred_type: None,
4387 },
4388 )))
4389 } else {
4390 Ok(e)
4391 }
4392 })?
4393 } else {
4394 normalized
4395 };
4396
4397 let transformed = target_dialect.transform(normalized)?;
4398
4399 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
4400 let transformed = if matches!(target, DialectType::DuckDB) {
4401 Self::seq_rownum_to_range(transformed)?
4402 } else {
4403 transformed
4404 };
4405
4406 let mut sql = if pretty {
4407 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
4408 } else {
4409 target_dialect.generate_with_source(&transformed, self.dialect_type)?
4410 };
4411
4412 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
4413 if pretty && target == DialectType::Snowflake {
4414 sql = Self::normalize_snowflake_pretty(sql);
4415 }
4416
4417 Ok(sql)
4418 })
4419 .collect()
4420 }
4421}
4422
4423// Transpile-only methods: cross-dialect normalization and helpers
4424#[cfg(feature = "transpile")]
4425impl Dialect {
4426 fn reject_pgvector_distance_operators_for_sqlite(&self, sql: &str) -> Result<()> {
4427 let tokens = self.tokenize(sql)?;
4428 for (i, token) in tokens.iter().enumerate() {
4429 if token.token_type == TokenType::NullsafeEq {
4430 return Err(crate::error::Error::unsupported(
4431 "PostgreSQL pgvector cosine distance operator <=>",
4432 "SQLite",
4433 ));
4434 }
4435 if token.token_type == TokenType::Lt
4436 && tokens
4437 .get(i + 1)
4438 .is_some_and(|token| token.token_type == TokenType::Tilde)
4439 && tokens
4440 .get(i + 2)
4441 .is_some_and(|token| token.token_type == TokenType::Gt)
4442 {
4443 return Err(crate::error::Error::unsupported(
4444 "PostgreSQL pgvector Hamming distance operator <~>",
4445 "SQLite",
4446 ));
4447 }
4448 }
4449 Ok(())
4450 }
4451
4452 fn normalize_postgres_to_sqlite_types(expr: Expression) -> Result<Expression> {
4453 fn sqlite_type(dt: crate::expressions::DataType) -> crate::expressions::DataType {
4454 use crate::expressions::DataType;
4455
4456 match dt {
4457 DataType::Bit { .. } => DataType::Int {
4458 length: None,
4459 integer_spelling: true,
4460 },
4461 DataType::TextWithLength { .. } => DataType::Text,
4462 DataType::VarChar { .. } => DataType::Text,
4463 DataType::Char { .. } => DataType::Text,
4464 DataType::Timestamp { timezone: true, .. } => DataType::Text,
4465 DataType::Custom { name } => {
4466 let base = name
4467 .split_once('(')
4468 .map_or(name.as_str(), |(base, _)| base)
4469 .trim();
4470 if base.eq_ignore_ascii_case("TSVECTOR")
4471 || base.eq_ignore_ascii_case("TIMESTAMPTZ")
4472 || base.eq_ignore_ascii_case("TIMESTAMP WITH TIME ZONE")
4473 || base.eq_ignore_ascii_case("NVARCHAR")
4474 || base.eq_ignore_ascii_case("NCHAR")
4475 {
4476 DataType::Text
4477 } else {
4478 DataType::Custom { name }
4479 }
4480 }
4481 _ => dt,
4482 }
4483 }
4484
4485 transform_recursive(expr, &|e| match e {
4486 Expression::DataType(dt) => Ok(Expression::DataType(sqlite_type(dt))),
4487 Expression::CreateTable(mut ct) => {
4488 for column in &mut ct.columns {
4489 column.data_type = sqlite_type(column.data_type.clone());
4490 }
4491 Ok(Expression::CreateTable(ct))
4492 }
4493 _ => Ok(e),
4494 })
4495 }
4496
4497 /// For DuckDB target: when FROM clause contains RANGE(n), replace
4498 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
4499 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
4500 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
4501 if let Expression::Select(mut select) = expr {
4502 // Check if FROM contains a RANGE function
4503 let has_range_from = if let Some(ref from) = select.from {
4504 from.expressions.iter().any(|e| {
4505 // Check for direct RANGE(...) or aliased RANGE(...)
4506 match e {
4507 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
4508 Expression::Alias(a) => {
4509 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
4510 }
4511 _ => false,
4512 }
4513 })
4514 } else {
4515 false
4516 };
4517
4518 if has_range_from {
4519 // Replace the ROW_NUMBER pattern in select expressions
4520 select.expressions = select
4521 .expressions
4522 .into_iter()
4523 .map(|e| Self::replace_rownum_with_range(e))
4524 .collect();
4525 }
4526
4527 Ok(Expression::Select(select))
4528 } else {
4529 Ok(expr)
4530 }
4531 }
4532
4533 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
4534 fn replace_rownum_with_range(expr: Expression) -> Expression {
4535 match expr {
4536 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
4537 Expression::Mod(op) => {
4538 let new_left = Self::try_replace_rownum_paren(&op.left);
4539 Expression::Mod(Box::new(crate::expressions::BinaryOp {
4540 left: new_left,
4541 right: op.right,
4542 left_comments: op.left_comments,
4543 operator_comments: op.operator_comments,
4544 trailing_comments: op.trailing_comments,
4545 inferred_type: op.inferred_type,
4546 }))
4547 }
4548 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
4549 Expression::Paren(p) => {
4550 let inner = Self::replace_rownum_with_range(p.this);
4551 Expression::Paren(Box::new(crate::expressions::Paren {
4552 this: inner,
4553 trailing_comments: p.trailing_comments,
4554 }))
4555 }
4556 Expression::Case(mut c) => {
4557 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
4558 c.whens = c
4559 .whens
4560 .into_iter()
4561 .map(|(cond, then)| {
4562 (
4563 Self::replace_rownum_with_range(cond),
4564 Self::replace_rownum_with_range(then),
4565 )
4566 })
4567 .collect();
4568 if let Some(else_) = c.else_ {
4569 c.else_ = Some(Self::replace_rownum_with_range(else_));
4570 }
4571 Expression::Case(c)
4572 }
4573 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
4574 left: Self::replace_rownum_with_range(op.left),
4575 right: op.right,
4576 left_comments: op.left_comments,
4577 operator_comments: op.operator_comments,
4578 trailing_comments: op.trailing_comments,
4579 inferred_type: op.inferred_type,
4580 })),
4581 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
4582 left: Self::replace_rownum_with_range(op.left),
4583 right: op.right,
4584 left_comments: op.left_comments,
4585 operator_comments: op.operator_comments,
4586 trailing_comments: op.trailing_comments,
4587 inferred_type: op.inferred_type,
4588 })),
4589 Expression::Alias(mut a) => {
4590 a.this = Self::replace_rownum_with_range(a.this);
4591 Expression::Alias(a)
4592 }
4593 other => other,
4594 }
4595 }
4596
4597 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
4598 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
4599 if let Expression::Paren(ref p) = expr {
4600 if let Expression::Sub(ref sub) = p.this {
4601 if let Expression::WindowFunction(ref wf) = sub.left {
4602 if let Expression::Function(ref f) = wf.this {
4603 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
4604 if let Expression::Literal(ref lit) = sub.right {
4605 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
4606 if n == "1" {
4607 return Expression::column("range");
4608 }
4609 }
4610 }
4611 }
4612 }
4613 }
4614 }
4615 }
4616 expr.clone()
4617 }
4618
4619 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
4620 /// Converts:
4621 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
4622 /// To:
4623 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
4624 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
4625 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
4626 use crate::expressions::*;
4627 transform_recursive(expr, &|e| {
4628 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
4629 if let Expression::ArraySize(ref af) = e {
4630 if let Expression::Function(ref f) = af.this {
4631 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4632 let result = Self::convert_array_size_gda_snowflake(f)?;
4633 return Ok(result);
4634 }
4635 }
4636 }
4637
4638 let Expression::Select(mut sel) = e else {
4639 return Ok(e);
4640 };
4641
4642 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
4643 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
4644 let mut gda_join_idx: Option<usize> = None;
4645
4646 for (idx, join) in sel.joins.iter().enumerate() {
4647 // The join.this may be:
4648 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
4649 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
4650 let (unnest_ref, alias_name) = match &join.this {
4651 Expression::Unnest(ref unnest) => {
4652 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
4653 (Some(unnest.as_ref()), alias)
4654 }
4655 Expression::Alias(ref a) => {
4656 if let Expression::Unnest(ref unnest) = a.this {
4657 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
4658 } else {
4659 (None, None)
4660 }
4661 }
4662 _ => (None, None),
4663 };
4664
4665 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
4666 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
4667 if let Expression::Function(ref f) = unnest.this {
4668 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4669 let start_expr = f.args[0].clone();
4670 let end_expr = f.args[1].clone();
4671 let step = f.args.get(2).cloned();
4672
4673 // Extract unit from step interval
4674 let unit = if let Some(Expression::Interval(ref iv)) = step {
4675 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
4676 Some(format!("{:?}", unit).to_ascii_uppercase())
4677 } else if let Some(ref this) = iv.this {
4678 // The interval may be stored as a string like "1 MONTH"
4679 if let Expression::Literal(lit) = this {
4680 if let Literal::String(ref s) = lit.as_ref() {
4681 let parts: Vec<&str> = s.split_whitespace().collect();
4682 if parts.len() == 2 {
4683 Some(parts[1].to_ascii_uppercase())
4684 } else if parts.len() == 1 {
4685 // Single word like "MONTH" or just "1"
4686 let upper = parts[0].to_ascii_uppercase();
4687 if matches!(
4688 upper.as_str(),
4689 "YEAR"
4690 | "QUARTER"
4691 | "MONTH"
4692 | "WEEK"
4693 | "DAY"
4694 | "HOUR"
4695 | "MINUTE"
4696 | "SECOND"
4697 ) {
4698 Some(upper)
4699 } else {
4700 None
4701 }
4702 } else {
4703 None
4704 }
4705 } else {
4706 None
4707 }
4708 } else {
4709 None
4710 }
4711 } else {
4712 None
4713 }
4714 } else {
4715 None
4716 };
4717
4718 if let Some(unit_str) = unit {
4719 gda_info = Some((alias, start_expr, end_expr, unit_str));
4720 gda_join_idx = Some(idx);
4721 }
4722 }
4723 }
4724 }
4725 if gda_info.is_some() {
4726 break;
4727 }
4728 }
4729
4730 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
4731 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
4732 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
4733 let result = Self::try_transform_from_gda_snowflake(sel);
4734 return result;
4735 };
4736 let join_idx = gda_join_idx.unwrap();
4737
4738 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
4739 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
4740 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
4741 let datediff = Expression::Function(Box::new(Function::new(
4742 "DATEDIFF".to_string(),
4743 vec![
4744 Expression::boxed_column(Column {
4745 name: Identifier::new(&unit_str),
4746 table: None,
4747 join_mark: false,
4748 trailing_comments: vec![],
4749 span: None,
4750 inferred_type: None,
4751 }),
4752 start_expr.clone(),
4753 end_expr.clone(),
4754 ],
4755 )));
4756 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
4757 left: datediff,
4758 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4759 left_comments: vec![],
4760 operator_comments: vec![],
4761 trailing_comments: vec![],
4762 inferred_type: None,
4763 }));
4764
4765 let array_gen_range = Expression::Function(Box::new(Function::new(
4766 "ARRAY_GENERATE_RANGE".to_string(),
4767 vec![
4768 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4769 datediff_plus_one,
4770 ],
4771 )));
4772
4773 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
4774 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
4775 name: Identifier::new("INPUT"),
4776 value: array_gen_range,
4777 separator: crate::expressions::NamedArgSeparator::DArrow,
4778 }));
4779 let flatten = Expression::Function(Box::new(Function::new(
4780 "FLATTEN".to_string(),
4781 vec![flatten_input],
4782 )));
4783
4784 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
4785 let alias_table = Alias {
4786 this: flatten,
4787 alias: Identifier::new("_t0"),
4788 column_aliases: vec![
4789 Identifier::new("seq"),
4790 Identifier::new("key"),
4791 Identifier::new("path"),
4792 Identifier::new("index"),
4793 Identifier::new(&alias_name),
4794 Identifier::new("this"),
4795 ],
4796 alias_explicit_as: false,
4797 alias_keyword: None,
4798 pre_alias_comments: vec![],
4799 trailing_comments: vec![],
4800 inferred_type: None,
4801 };
4802 let lateral_expr = Expression::Lateral(Box::new(Lateral {
4803 this: Box::new(Expression::Alias(Box::new(alias_table))),
4804 view: None,
4805 outer: None,
4806 alias: None,
4807 alias_quoted: false,
4808 cross_apply: None,
4809 ordinality: None,
4810 column_aliases: vec![],
4811 }));
4812
4813 // Remove the original join and add to FROM expressions
4814 sel.joins.remove(join_idx);
4815 if let Some(ref mut from) = sel.from {
4816 from.expressions.push(lateral_expr);
4817 }
4818
4819 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
4820 let dateadd_expr = Expression::Function(Box::new(Function::new(
4821 "DATEADD".to_string(),
4822 vec![
4823 Expression::boxed_column(Column {
4824 name: Identifier::new(&unit_str),
4825 table: None,
4826 join_mark: false,
4827 trailing_comments: vec![],
4828 span: None,
4829 inferred_type: None,
4830 }),
4831 Expression::Cast(Box::new(Cast {
4832 this: Expression::boxed_column(Column {
4833 name: Identifier::new(&alias_name),
4834 table: None,
4835 join_mark: false,
4836 trailing_comments: vec![],
4837 span: None,
4838 inferred_type: None,
4839 }),
4840 to: DataType::Int {
4841 length: None,
4842 integer_spelling: false,
4843 },
4844 trailing_comments: vec![],
4845 double_colon_syntax: false,
4846 format: None,
4847 default: None,
4848 inferred_type: None,
4849 })),
4850 Expression::Cast(Box::new(Cast {
4851 this: start_expr.clone(),
4852 to: DataType::Date,
4853 trailing_comments: vec![],
4854 double_colon_syntax: false,
4855 format: None,
4856 default: None,
4857 inferred_type: None,
4858 })),
4859 ],
4860 )));
4861
4862 // Replace references to the alias in the SELECT list
4863 let new_exprs: Vec<Expression> = sel
4864 .expressions
4865 .iter()
4866 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
4867 .collect();
4868 sel.expressions = new_exprs;
4869
4870 Ok(Expression::Select(sel))
4871 })
4872 }
4873
4874 /// Helper: replace column references to `alias_name` with dateadd expression
4875 fn replace_column_ref_with_dateadd(
4876 expr: &Expression,
4877 alias_name: &str,
4878 dateadd: &Expression,
4879 ) -> Expression {
4880 use crate::expressions::*;
4881 match expr {
4882 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4883 // Plain column reference -> DATEADD(...) AS alias_name
4884 Expression::Alias(Box::new(Alias {
4885 this: dateadd.clone(),
4886 alias: Identifier::new(alias_name),
4887 column_aliases: vec![],
4888 alias_explicit_as: false,
4889 alias_keyword: None,
4890 pre_alias_comments: vec![],
4891 trailing_comments: vec![],
4892 inferred_type: None,
4893 }))
4894 }
4895 Expression::Alias(a) => {
4896 // Check if the inner expression references the alias
4897 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
4898 Expression::Alias(Box::new(Alias {
4899 this: new_this,
4900 alias: a.alias.clone(),
4901 column_aliases: a.column_aliases.clone(),
4902 alias_explicit_as: false,
4903 alias_keyword: None,
4904 pre_alias_comments: a.pre_alias_comments.clone(),
4905 trailing_comments: a.trailing_comments.clone(),
4906 inferred_type: None,
4907 }))
4908 }
4909 _ => expr.clone(),
4910 }
4911 }
4912
4913 /// Helper: replace column references in inner expression (not top-level)
4914 fn replace_column_ref_inner(
4915 expr: &Expression,
4916 alias_name: &str,
4917 dateadd: &Expression,
4918 ) -> Expression {
4919 use crate::expressions::*;
4920 match expr {
4921 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4922 dateadd.clone()
4923 }
4924 Expression::Add(op) => {
4925 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4926 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4927 Expression::Add(Box::new(BinaryOp {
4928 left,
4929 right,
4930 left_comments: op.left_comments.clone(),
4931 operator_comments: op.operator_comments.clone(),
4932 trailing_comments: op.trailing_comments.clone(),
4933 inferred_type: None,
4934 }))
4935 }
4936 Expression::Sub(op) => {
4937 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4938 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4939 Expression::Sub(Box::new(BinaryOp {
4940 left,
4941 right,
4942 left_comments: op.left_comments.clone(),
4943 operator_comments: op.operator_comments.clone(),
4944 trailing_comments: op.trailing_comments.clone(),
4945 inferred_type: None,
4946 }))
4947 }
4948 Expression::Mul(op) => {
4949 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4950 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4951 Expression::Mul(Box::new(BinaryOp {
4952 left,
4953 right,
4954 left_comments: op.left_comments.clone(),
4955 operator_comments: op.operator_comments.clone(),
4956 trailing_comments: op.trailing_comments.clone(),
4957 inferred_type: None,
4958 }))
4959 }
4960 _ => expr.clone(),
4961 }
4962 }
4963
4964 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
4965 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
4966 fn try_transform_from_gda_snowflake(
4967 mut sel: Box<crate::expressions::Select>,
4968 ) -> Result<Expression> {
4969 use crate::expressions::*;
4970
4971 // Extract GDA info from FROM clause
4972 let mut gda_info: Option<(
4973 usize,
4974 String,
4975 Expression,
4976 Expression,
4977 String,
4978 Option<(String, Vec<Identifier>)>,
4979 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
4980
4981 if let Some(ref from) = sel.from {
4982 for (idx, table_expr) in from.expressions.iter().enumerate() {
4983 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
4984 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
4985 let (unnest_opt, outer_alias_info) = match table_expr {
4986 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
4987 Expression::Alias(ref a) => {
4988 if let Expression::Unnest(ref unnest) = a.this {
4989 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
4990 (Some(unnest.as_ref()), Some(alias_info))
4991 } else {
4992 (None, None)
4993 }
4994 }
4995 _ => (None, None),
4996 };
4997
4998 if let Some(unnest) = unnest_opt {
4999 // Check for GENERATE_DATE_ARRAY function
5000 let func_opt = match &unnest.this {
5001 Expression::Function(ref f)
5002 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
5003 && f.args.len() >= 2 =>
5004 {
5005 Some(f)
5006 }
5007 // Also check for GenerateSeries (from earlier normalization)
5008 _ => None,
5009 };
5010
5011 if let Some(f) = func_opt {
5012 let start_expr = f.args[0].clone();
5013 let end_expr = f.args[1].clone();
5014 let step = f.args.get(2).cloned();
5015
5016 // Extract unit and column name
5017 let unit = Self::extract_interval_unit_str(&step);
5018 let col_name = outer_alias_info
5019 .as_ref()
5020 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
5021 .unwrap_or_else(|| "value".to_string());
5022
5023 if let Some(unit_str) = unit {
5024 gda_info = Some((
5025 idx,
5026 col_name,
5027 start_expr,
5028 end_expr,
5029 unit_str,
5030 outer_alias_info,
5031 ));
5032 break;
5033 }
5034 }
5035 }
5036 }
5037 }
5038
5039 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
5040 else {
5041 return Ok(Expression::Select(sel));
5042 };
5043
5044 // Build the Snowflake subquery:
5045 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
5046 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
5047
5048 // DATEDIFF(unit, start, end)
5049 let datediff = Expression::Function(Box::new(Function::new(
5050 "DATEDIFF".to_string(),
5051 vec![
5052 Expression::boxed_column(Column {
5053 name: Identifier::new(&unit_str),
5054 table: None,
5055 join_mark: false,
5056 trailing_comments: vec![],
5057 span: None,
5058 inferred_type: None,
5059 }),
5060 start_expr.clone(),
5061 end_expr.clone(),
5062 ],
5063 )));
5064 // DATEDIFF(...) + 1
5065 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
5066 left: datediff,
5067 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5068 left_comments: vec![],
5069 operator_comments: vec![],
5070 trailing_comments: vec![],
5071 inferred_type: None,
5072 }));
5073
5074 let array_gen_range = Expression::Function(Box::new(Function::new(
5075 "ARRAY_GENERATE_RANGE".to_string(),
5076 vec![
5077 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
5078 datediff_plus_one,
5079 ],
5080 )));
5081
5082 // TABLE(FLATTEN(INPUT => ...))
5083 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
5084 name: Identifier::new("INPUT"),
5085 value: array_gen_range,
5086 separator: crate::expressions::NamedArgSeparator::DArrow,
5087 }));
5088 let flatten = Expression::Function(Box::new(Function::new(
5089 "FLATTEN".to_string(),
5090 vec![flatten_input],
5091 )));
5092
5093 // Determine alias name for the table: use outer alias or _t0
5094 let table_alias_name = outer_alias_info
5095 .as_ref()
5096 .map(|(name, _)| name.clone())
5097 .unwrap_or_else(|| "_t0".to_string());
5098
5099 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
5100 let table_func =
5101 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
5102 let flatten_aliased = Expression::Alias(Box::new(Alias {
5103 this: table_func,
5104 alias: Identifier::new(&table_alias_name),
5105 column_aliases: vec![
5106 Identifier::new("seq"),
5107 Identifier::new("key"),
5108 Identifier::new("path"),
5109 Identifier::new("index"),
5110 Identifier::new(&col_name),
5111 Identifier::new("this"),
5112 ],
5113 alias_explicit_as: false,
5114 alias_keyword: None,
5115 pre_alias_comments: vec![],
5116 trailing_comments: vec![],
5117 inferred_type: None,
5118 }));
5119
5120 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
5121 let dateadd_expr = Expression::Function(Box::new(Function::new(
5122 "DATEADD".to_string(),
5123 vec![
5124 Expression::boxed_column(Column {
5125 name: Identifier::new(&unit_str),
5126 table: None,
5127 join_mark: false,
5128 trailing_comments: vec![],
5129 span: None,
5130 inferred_type: None,
5131 }),
5132 Expression::Cast(Box::new(Cast {
5133 this: Expression::boxed_column(Column {
5134 name: Identifier::new(&col_name),
5135 table: None,
5136 join_mark: false,
5137 trailing_comments: vec![],
5138 span: None,
5139 inferred_type: None,
5140 }),
5141 to: DataType::Int {
5142 length: None,
5143 integer_spelling: false,
5144 },
5145 trailing_comments: vec![],
5146 double_colon_syntax: false,
5147 format: None,
5148 default: None,
5149 inferred_type: None,
5150 })),
5151 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
5152 start_expr.clone(),
5153 ],
5154 )));
5155 let dateadd_aliased = Expression::Alias(Box::new(Alias {
5156 this: dateadd_expr,
5157 alias: Identifier::new(&col_name),
5158 column_aliases: vec![],
5159 alias_explicit_as: false,
5160 alias_keyword: None,
5161 pre_alias_comments: vec![],
5162 trailing_comments: vec![],
5163 inferred_type: None,
5164 }));
5165
5166 // Build inner SELECT
5167 let mut inner_select = Select::new();
5168 inner_select.expressions = vec![dateadd_aliased];
5169 inner_select.from = Some(From {
5170 expressions: vec![flatten_aliased],
5171 });
5172
5173 let inner_select_expr = Expression::Select(Box::new(inner_select));
5174 let subquery = Expression::Subquery(Box::new(Subquery {
5175 this: inner_select_expr,
5176 alias: None,
5177 column_aliases: vec![],
5178 alias_explicit_as: false,
5179 alias_keyword: None,
5180 order_by: None,
5181 limit: None,
5182 offset: None,
5183 distribute_by: None,
5184 sort_by: None,
5185 cluster_by: None,
5186 lateral: false,
5187 modifiers_inside: false,
5188 trailing_comments: vec![],
5189 inferred_type: None,
5190 }));
5191
5192 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
5193 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
5194 Expression::Alias(Box::new(Alias {
5195 this: subquery,
5196 alias: Identifier::new(&alias_name),
5197 column_aliases: col_aliases,
5198 alias_explicit_as: false,
5199 alias_keyword: None,
5200 pre_alias_comments: vec![],
5201 trailing_comments: vec![],
5202 inferred_type: None,
5203 }))
5204 } else {
5205 subquery
5206 };
5207
5208 // Replace the FROM expression
5209 if let Some(ref mut from) = sel.from {
5210 from.expressions[from_idx] = replacement;
5211 }
5212
5213 Ok(Expression::Select(sel))
5214 }
5215
5216 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
5217 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
5218 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
5219 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
5220 use crate::expressions::*;
5221
5222 let start_expr = f.args[0].clone();
5223 let end_expr = f.args[1].clone();
5224 let step = f.args.get(2).cloned();
5225 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
5226 let col_name = "value";
5227
5228 // Build the inner subquery: same as try_transform_from_gda_snowflake
5229 let datediff = Expression::Function(Box::new(Function::new(
5230 "DATEDIFF".to_string(),
5231 vec![
5232 Expression::boxed_column(Column {
5233 name: Identifier::new(&unit_str),
5234 table: None,
5235 join_mark: false,
5236 trailing_comments: vec![],
5237 span: None,
5238 inferred_type: None,
5239 }),
5240 start_expr.clone(),
5241 end_expr.clone(),
5242 ],
5243 )));
5244 // DATEDIFF(...) + 1
5245 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
5246 left: datediff,
5247 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5248 left_comments: vec![],
5249 operator_comments: vec![],
5250 trailing_comments: vec![],
5251 inferred_type: None,
5252 }));
5253
5254 let array_gen_range = Expression::Function(Box::new(Function::new(
5255 "ARRAY_GENERATE_RANGE".to_string(),
5256 vec![
5257 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
5258 datediff_plus_one,
5259 ],
5260 )));
5261
5262 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
5263 name: Identifier::new("INPUT"),
5264 value: array_gen_range,
5265 separator: crate::expressions::NamedArgSeparator::DArrow,
5266 }));
5267 let flatten = Expression::Function(Box::new(Function::new(
5268 "FLATTEN".to_string(),
5269 vec![flatten_input],
5270 )));
5271
5272 let table_func =
5273 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
5274 let flatten_aliased = Expression::Alias(Box::new(Alias {
5275 this: table_func,
5276 alias: Identifier::new("_t0"),
5277 column_aliases: vec![
5278 Identifier::new("seq"),
5279 Identifier::new("key"),
5280 Identifier::new("path"),
5281 Identifier::new("index"),
5282 Identifier::new(col_name),
5283 Identifier::new("this"),
5284 ],
5285 alias_explicit_as: false,
5286 alias_keyword: None,
5287 pre_alias_comments: vec![],
5288 trailing_comments: vec![],
5289 inferred_type: None,
5290 }));
5291
5292 let dateadd_expr = Expression::Function(Box::new(Function::new(
5293 "DATEADD".to_string(),
5294 vec![
5295 Expression::boxed_column(Column {
5296 name: Identifier::new(&unit_str),
5297 table: None,
5298 join_mark: false,
5299 trailing_comments: vec![],
5300 span: None,
5301 inferred_type: None,
5302 }),
5303 Expression::Cast(Box::new(Cast {
5304 this: Expression::boxed_column(Column {
5305 name: Identifier::new(col_name),
5306 table: None,
5307 join_mark: false,
5308 trailing_comments: vec![],
5309 span: None,
5310 inferred_type: None,
5311 }),
5312 to: DataType::Int {
5313 length: None,
5314 integer_spelling: false,
5315 },
5316 trailing_comments: vec![],
5317 double_colon_syntax: false,
5318 format: None,
5319 default: None,
5320 inferred_type: None,
5321 })),
5322 start_expr.clone(),
5323 ],
5324 )));
5325 let dateadd_aliased = Expression::Alias(Box::new(Alias {
5326 this: dateadd_expr,
5327 alias: Identifier::new(col_name),
5328 column_aliases: vec![],
5329 alias_explicit_as: false,
5330 alias_keyword: None,
5331 pre_alias_comments: vec![],
5332 trailing_comments: vec![],
5333 inferred_type: None,
5334 }));
5335
5336 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
5337 let mut inner_select = Select::new();
5338 inner_select.expressions = vec![dateadd_aliased];
5339 inner_select.from = Some(From {
5340 expressions: vec![flatten_aliased],
5341 });
5342
5343 // Wrap in subquery for the inner part
5344 let inner_subquery = Expression::Subquery(Box::new(Subquery {
5345 this: Expression::Select(Box::new(inner_select)),
5346 alias: None,
5347 column_aliases: vec![],
5348 alias_explicit_as: false,
5349 alias_keyword: None,
5350 order_by: None,
5351 limit: None,
5352 offset: None,
5353 distribute_by: None,
5354 sort_by: None,
5355 cluster_by: None,
5356 lateral: false,
5357 modifiers_inside: false,
5358 trailing_comments: vec![],
5359 inferred_type: None,
5360 }));
5361
5362 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
5363 let star = Expression::Star(Star {
5364 table: None,
5365 except: None,
5366 replace: None,
5367 rename: None,
5368 trailing_comments: vec![],
5369 span: None,
5370 });
5371 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
5372 this: star,
5373 distinct: false,
5374 filter: None,
5375 order_by: vec![],
5376 name: Some("ARRAY_AGG".to_string()),
5377 ignore_nulls: None,
5378 having_max: None,
5379 limit: None,
5380 inferred_type: None,
5381 }));
5382
5383 let mut outer_select = Select::new();
5384 outer_select.expressions = vec![array_agg];
5385 outer_select.from = Some(From {
5386 expressions: vec![inner_subquery],
5387 });
5388
5389 // Wrap in a subquery
5390 let outer_subquery = Expression::Subquery(Box::new(Subquery {
5391 this: Expression::Select(Box::new(outer_select)),
5392 alias: None,
5393 column_aliases: vec![],
5394 alias_explicit_as: false,
5395 alias_keyword: None,
5396 order_by: None,
5397 limit: None,
5398 offset: None,
5399 distribute_by: None,
5400 sort_by: None,
5401 cluster_by: None,
5402 lateral: false,
5403 modifiers_inside: false,
5404 trailing_comments: vec![],
5405 inferred_type: None,
5406 }));
5407
5408 // ARRAY_SIZE(subquery)
5409 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
5410 outer_subquery,
5411 ))))
5412 }
5413
5414 /// Extract interval unit string from an optional step expression.
5415 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
5416 use crate::expressions::*;
5417 if let Some(Expression::Interval(ref iv)) = step {
5418 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
5419 return Some(format!("{:?}", unit).to_ascii_uppercase());
5420 }
5421 if let Some(ref this) = iv.this {
5422 if let Expression::Literal(lit) = this {
5423 if let Literal::String(ref s) = lit.as_ref() {
5424 let parts: Vec<&str> = s.split_whitespace().collect();
5425 if parts.len() == 2 {
5426 return Some(parts[1].to_ascii_uppercase());
5427 } else if parts.len() == 1 {
5428 let upper = parts[0].to_ascii_uppercase();
5429 if matches!(
5430 upper.as_str(),
5431 "YEAR"
5432 | "QUARTER"
5433 | "MONTH"
5434 | "WEEK"
5435 | "DAY"
5436 | "HOUR"
5437 | "MINUTE"
5438 | "SECOND"
5439 ) {
5440 return Some(upper);
5441 }
5442 }
5443 }
5444 }
5445 }
5446 }
5447 // Default to DAY if no step or no interval
5448 if step.is_none() {
5449 return Some("DAY".to_string());
5450 }
5451 None
5452 }
5453
5454 fn normalize_snowflake_pretty(mut sql: String) -> String {
5455 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
5456 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
5457 {
5458 sql = sql.replace(
5459 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
5460 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
5461 );
5462
5463 sql = sql.replace(
5464 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
5465 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
5466 );
5467
5468 sql = sql.replace(
5469 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
5470 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
5471 );
5472 }
5473
5474 sql
5475 }
5476
5477 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
5478 /// This handles cases where the same syntax has different semantics across dialects.
5479 fn cross_dialect_normalize(
5480 expr: Expression,
5481 source: DialectType,
5482 target: DialectType,
5483 ) -> Result<Expression> {
5484 use crate::expressions::{
5485 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
5486 Function, Identifier, IsNull, Literal, Null, Paren,
5487 };
5488
5489 // Helper to tag which kind of transform to apply
5490 #[derive(Debug)]
5491 enum Action {
5492 None,
5493 GreatestLeastNull,
5494 ArrayGenerateRange,
5495 Div0TypedDivision,
5496 ArrayAggCollectList,
5497 ArrayAggWithinGroupFilter,
5498 ArrayAggFilter,
5499 CastTimestampToDatetime,
5500 DateTruncWrapCast,
5501 ToDateToCast,
5502 ConvertTimezoneToExpr,
5503 SetToVariable,
5504 RegexpReplaceSnowflakeToDuckDB,
5505 BigQueryFunctionNormalize,
5506 BigQuerySafeDivide,
5507 BigQueryCastType,
5508 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
5509 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
5510 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
5511 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
5512 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
5513 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
5514 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5515 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
5516 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
5517 EpochConvert, // Expression::Epoch -> target-specific epoch function
5518 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
5519 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
5520 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
5521 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
5522 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
5523 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
5524 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
5525 TempTableHash, // TSQL #table -> temp table normalization
5526 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
5527 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
5528 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
5529 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
5530 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
5531 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
5532 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5533 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5534 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
5535 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
5536 DuckDBCastJsonToVariant, // DuckDB CAST(x AS JSON) -> CAST(x AS VARIANT) for Snowflake
5537 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
5538 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
5539 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
5540 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
5541 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
5542 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
5543 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
5544 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
5545 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
5546 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
5547 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
5548 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
5549 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
5550 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
5551 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
5552 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
5553 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5554 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
5555 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
5556 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
5557 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
5558 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
5559 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
5560 DollarParamConvert, // $foo -> @foo for BigQuery
5561 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
5562 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
5563 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
5564 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
5565 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
5566 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
5567 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
5568 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
5569 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
5570 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
5571 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
5572 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
5573 RespectNullsConvert, // RESPECT NULLS window function handling
5574 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
5575 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
5576 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
5577 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
5578 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
5579 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
5580 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
5581 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
5582 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
5583 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
5584 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
5585 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
5586 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
5587 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
5588 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
5589 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
5590 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
5591 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
5592 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
5593 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
5594 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
5595 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
5596 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
5597 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
5598 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
5599 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
5600 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
5601 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
5602 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
5603 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
5604 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5605 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5606 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
5607 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
5608 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
5609 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
5610 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
5611 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
5612 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
5613 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
5614 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
5615 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
5616 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
5617 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
5618 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
5619 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
5620 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
5621 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
5622 DecodeSimplify, // DECODE with null-safe -> simple = comparison
5623 ArraySumConvert, // ARRAY_SUM -> target-specific
5624 ArraySizeConvert, // ARRAY_SIZE -> target-specific
5625 ArrayAnyConvert, // ARRAY_ANY -> target-specific
5626 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
5627 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
5628 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
5629 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
5630 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
5631 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
5632 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
5633 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
5634 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
5635 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
5636 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
5637 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
5638 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
5639 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
5640 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
5641 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
5642 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
5643 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
5644 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
5645 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
5646 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5647 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
5648 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
5649 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
5650 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
5651 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
5652 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
5653 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
5654 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
5655 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
5656 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
5657 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
5658 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
5659 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
5660 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
5661 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
5662 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
5663 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
5664 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
5665 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
5666 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
5667 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
5668 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
5669 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
5670 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
5671 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
5672 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
5673 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
5674 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
5675 }
5676
5677 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
5678 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
5679 Self::transform_select_into(expr, source, target)
5680 } else {
5681 expr
5682 };
5683
5684 // Strip OFFSET ROWS for non-TSQL/Oracle targets
5685 let expr = if !matches!(
5686 target,
5687 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
5688 ) {
5689 if let Expression::Select(mut select) = expr {
5690 if let Some(ref mut offset) = select.offset {
5691 offset.rows = None;
5692 }
5693 Expression::Select(select)
5694 } else {
5695 expr
5696 }
5697 } else {
5698 expr
5699 };
5700
5701 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
5702 let expr = if matches!(target, DialectType::Oracle) {
5703 if let Expression::Select(mut select) = expr {
5704 if let Some(limit) = select.limit.take() {
5705 // Convert LIMIT to FETCH FIRST n ROWS ONLY
5706 select.fetch = Some(crate::expressions::Fetch {
5707 direction: "FIRST".to_string(),
5708 count: Some(limit.this),
5709 percent: false,
5710 rows: true,
5711 with_ties: false,
5712 });
5713 }
5714 // Add ROWS to OFFSET if present
5715 if let Some(ref mut offset) = select.offset {
5716 offset.rows = Some(true);
5717 }
5718 Expression::Select(select)
5719 } else {
5720 expr
5721 }
5722 } else {
5723 expr
5724 };
5725
5726 // Handle CreateTable WITH properties transformation before recursive transforms
5727 let expr = if let Expression::CreateTable(mut ct) = expr {
5728 Self::transform_create_table_properties(&mut ct, source, target);
5729
5730 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
5731 // When the PARTITIONED BY clause contains column definitions, merge them into the
5732 // main column list and adjust the PARTITIONED BY clause for the target dialect.
5733 if matches!(
5734 source,
5735 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5736 ) {
5737 let mut partition_col_names: Vec<String> = Vec::new();
5738 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
5739 let mut has_col_def_partitions = false;
5740
5741 // Check if any PARTITIONED BY property contains ColumnDef expressions
5742 for prop in &ct.properties {
5743 if let Expression::PartitionedByProperty(ref pbp) = prop {
5744 if let Expression::Tuple(ref tuple) = *pbp.this {
5745 for expr in &tuple.expressions {
5746 if let Expression::ColumnDef(ref cd) = expr {
5747 has_col_def_partitions = true;
5748 partition_col_names.push(cd.name.name.clone());
5749 partition_col_defs.push(*cd.clone());
5750 }
5751 }
5752 }
5753 }
5754 }
5755
5756 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
5757 // Merge partition columns into main column list
5758 for cd in partition_col_defs {
5759 ct.columns.push(cd);
5760 }
5761
5762 // Replace PARTITIONED BY property with column-name-only version
5763 ct.properties
5764 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
5765
5766 if matches!(
5767 target,
5768 DialectType::Presto | DialectType::Trino | DialectType::Athena
5769 ) {
5770 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
5771 let array_elements: Vec<String> = partition_col_names
5772 .iter()
5773 .map(|n| format!("'{}'", n))
5774 .collect();
5775 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
5776 ct.with_properties
5777 .push(("PARTITIONED_BY".to_string(), array_value));
5778 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5779 // Spark: PARTITIONED BY (y, z) - just column names
5780 let name_exprs: Vec<Expression> = partition_col_names
5781 .iter()
5782 .map(|n| {
5783 Expression::Column(Box::new(crate::expressions::Column {
5784 name: crate::expressions::Identifier::new(n.clone()),
5785 table: None,
5786 join_mark: false,
5787 trailing_comments: Vec::new(),
5788 span: None,
5789 inferred_type: None,
5790 }))
5791 })
5792 .collect();
5793 ct.properties.insert(
5794 0,
5795 Expression::PartitionedByProperty(Box::new(
5796 crate::expressions::PartitionedByProperty {
5797 this: Box::new(Expression::Tuple(Box::new(
5798 crate::expressions::Tuple {
5799 expressions: name_exprs,
5800 },
5801 ))),
5802 },
5803 )),
5804 );
5805 }
5806 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
5807 }
5808
5809 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
5810 // are handled by transform_create_table_properties which runs first
5811 }
5812
5813 // Strip LOCATION property for Presto/Trino (not supported)
5814 if matches!(
5815 target,
5816 DialectType::Presto | DialectType::Trino | DialectType::Athena
5817 ) {
5818 ct.properties
5819 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
5820 }
5821
5822 // Strip table-level constraints for Spark/Hive/Databricks
5823 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
5824 if matches!(
5825 target,
5826 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5827 ) {
5828 ct.constraints.retain(|c| {
5829 matches!(
5830 c,
5831 crate::expressions::TableConstraint::PrimaryKey { .. }
5832 | crate::expressions::TableConstraint::Like { .. }
5833 )
5834 });
5835 for constraint in &mut ct.constraints {
5836 if let crate::expressions::TableConstraint::PrimaryKey {
5837 columns,
5838 modifiers,
5839 ..
5840 } = constraint
5841 {
5842 // Strip ASC/DESC from column names
5843 for col in columns.iter_mut() {
5844 if col.name.ends_with(" ASC") {
5845 col.name = col.name[..col.name.len() - 4].to_string();
5846 } else if col.name.ends_with(" DESC") {
5847 col.name = col.name[..col.name.len() - 5].to_string();
5848 }
5849 }
5850 // Strip TSQL-specific modifiers
5851 modifiers.clustered = None;
5852 modifiers.with_options.clear();
5853 modifiers.on_filegroup = None;
5854 }
5855 }
5856 }
5857
5858 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
5859 if matches!(target, DialectType::Databricks) {
5860 for col in &mut ct.columns {
5861 if col.auto_increment {
5862 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
5863 col.data_type = crate::expressions::DataType::BigInt { length: None };
5864 }
5865 }
5866 }
5867 }
5868
5869 // Spark/Databricks: INTEGER -> INT in column definitions
5870 // Python sqlglot always outputs INT for Spark/Databricks
5871 if matches!(target, DialectType::Spark | DialectType::Databricks) {
5872 for col in &mut ct.columns {
5873 if let crate::expressions::DataType::Int {
5874 integer_spelling, ..
5875 } = &mut col.data_type
5876 {
5877 *integer_spelling = false;
5878 }
5879 }
5880 }
5881
5882 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
5883 if matches!(target, DialectType::Hive | DialectType::Spark) {
5884 for col in &mut ct.columns {
5885 // If nullable is explicitly true (NULL), change to None (omit it)
5886 if col.nullable == Some(true) {
5887 col.nullable = None;
5888 }
5889 // Also remove from constraints if stored there
5890 col.constraints
5891 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
5892 }
5893 }
5894
5895 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
5896 if ct.on_property.is_some()
5897 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
5898 {
5899 ct.on_property = None;
5900 }
5901
5902 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
5903 // Snowflake doesn't support typed arrays in DDL
5904 if matches!(target, DialectType::Snowflake) {
5905 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
5906 if let crate::expressions::DataType::Array { .. } = dt {
5907 *dt = crate::expressions::DataType::Custom {
5908 name: "ARRAY".to_string(),
5909 };
5910 }
5911 }
5912 for col in &mut ct.columns {
5913 strip_array_type_params(&mut col.data_type);
5914 }
5915 }
5916
5917 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
5918 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
5919 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
5920 if matches!(target, DialectType::PostgreSQL) {
5921 for col in &mut ct.columns {
5922 if col.auto_increment && !col.constraint_order.is_empty() {
5923 use crate::expressions::ConstraintType;
5924 let has_explicit_not_null = col
5925 .constraint_order
5926 .iter()
5927 .any(|ct| *ct == ConstraintType::NotNull);
5928
5929 if has_explicit_not_null {
5930 // Source had explicit NOT NULL - preserve original order
5931 // Just ensure nullable is set
5932 if col.nullable != Some(false) {
5933 col.nullable = Some(false);
5934 }
5935 } else {
5936 // Source didn't have explicit NOT NULL - build order with
5937 // AutoIncrement + NotNull first, then remaining constraints
5938 let mut new_order = Vec::new();
5939 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
5940 new_order.push(ConstraintType::AutoIncrement);
5941 new_order.push(ConstraintType::NotNull);
5942 // Add remaining constraints in original order (except AutoIncrement)
5943 for ct_type in &col.constraint_order {
5944 if *ct_type != ConstraintType::AutoIncrement {
5945 new_order.push(ct_type.clone());
5946 }
5947 }
5948 col.constraint_order = new_order;
5949 col.nullable = Some(false);
5950 }
5951 }
5952 }
5953 }
5954
5955 Expression::CreateTable(ct)
5956 } else {
5957 expr
5958 };
5959
5960 // Handle CreateView column stripping for Presto/Trino target
5961 let expr = if let Expression::CreateView(mut cv) = expr {
5962 // Presto/Trino: drop column list when view has a SELECT body
5963 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
5964 {
5965 if !matches!(&cv.query, Expression::Null(_)) {
5966 cv.columns.clear();
5967 }
5968 }
5969 Expression::CreateView(cv)
5970 } else {
5971 expr
5972 };
5973
5974 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
5975 let expr = if !matches!(
5976 target,
5977 DialectType::Presto | DialectType::Trino | DialectType::Athena
5978 ) {
5979 if let Expression::Select(mut select) = expr {
5980 if let Some(ref mut with) = select.with {
5981 for cte in &mut with.ctes {
5982 if let Expression::Values(ref vals) = cte.this {
5983 // Build: SELECT * FROM (VALUES ...) AS _values
5984 let values_subquery =
5985 Expression::Subquery(Box::new(crate::expressions::Subquery {
5986 this: Expression::Values(vals.clone()),
5987 alias: Some(Identifier::new("_values".to_string())),
5988 column_aliases: Vec::new(),
5989 alias_explicit_as: false,
5990 alias_keyword: None,
5991 order_by: None,
5992 limit: None,
5993 offset: None,
5994 distribute_by: None,
5995 sort_by: None,
5996 cluster_by: None,
5997 lateral: false,
5998 modifiers_inside: false,
5999 trailing_comments: Vec::new(),
6000 inferred_type: None,
6001 }));
6002 let mut new_select = crate::expressions::Select::new();
6003 new_select.expressions =
6004 vec![Expression::Star(crate::expressions::Star {
6005 table: None,
6006 except: None,
6007 replace: None,
6008 rename: None,
6009 trailing_comments: Vec::new(),
6010 span: None,
6011 })];
6012 new_select.from = Some(crate::expressions::From {
6013 expressions: vec![values_subquery],
6014 });
6015 cte.this = Expression::Select(Box::new(new_select));
6016 }
6017 }
6018 }
6019 Expression::Select(select)
6020 } else {
6021 expr
6022 }
6023 } else {
6024 expr
6025 };
6026
6027 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
6028 let expr = if matches!(target, DialectType::PostgreSQL) {
6029 if let Expression::CreateIndex(mut ci) = expr {
6030 for col in &mut ci.columns {
6031 if col.nulls_first.is_none() {
6032 col.nulls_first = Some(true);
6033 }
6034 }
6035 Expression::CreateIndex(ci)
6036 } else {
6037 expr
6038 }
6039 } else {
6040 expr
6041 };
6042
6043 transform_recursive(expr, &|e| {
6044 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
6045 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
6046 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
6047 if let Expression::Cast(ref c) = e {
6048 // Check if this is a CAST of an array to a struct array type
6049 let is_struct_array_cast =
6050 matches!(&c.to, crate::expressions::DataType::Array { .. });
6051 if is_struct_array_cast {
6052 let has_auto_named_structs = match &c.this {
6053 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
6054 if let Expression::Struct(s) = elem {
6055 s.fields.iter().all(|(name, _)| {
6056 name.as_ref().map_or(true, |n| {
6057 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
6058 })
6059 })
6060 } else {
6061 false
6062 }
6063 }),
6064 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
6065 if let Expression::Struct(s) = elem {
6066 s.fields.iter().all(|(name, _)| {
6067 name.as_ref().map_or(true, |n| {
6068 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
6069 })
6070 })
6071 } else {
6072 false
6073 }
6074 }),
6075 _ => false,
6076 };
6077 if has_auto_named_structs {
6078 let convert_struct_to_row = |elem: Expression| -> Expression {
6079 if let Expression::Struct(s) = elem {
6080 let row_args: Vec<Expression> =
6081 s.fields.into_iter().map(|(_, v)| v).collect();
6082 Expression::Function(Box::new(Function::new(
6083 "ROW".to_string(),
6084 row_args,
6085 )))
6086 } else {
6087 elem
6088 }
6089 };
6090 let mut c_clone = c.as_ref().clone();
6091 match &mut c_clone.this {
6092 Expression::Array(arr) => {
6093 arr.expressions = arr
6094 .expressions
6095 .drain(..)
6096 .map(convert_struct_to_row)
6097 .collect();
6098 }
6099 Expression::ArrayFunc(arr) => {
6100 arr.expressions = arr
6101 .expressions
6102 .drain(..)
6103 .map(convert_struct_to_row)
6104 .collect();
6105 }
6106 _ => {}
6107 }
6108 return Ok(Expression::Cast(Box::new(c_clone)));
6109 }
6110 }
6111 }
6112 }
6113
6114 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
6115 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
6116 if let Expression::Select(ref sel) = e {
6117 if sel.kind.as_deref() == Some("STRUCT") {
6118 let mut fields = Vec::new();
6119 for expr in &sel.expressions {
6120 match expr {
6121 Expression::Alias(a) => {
6122 fields.push((Some(a.alias.name.clone()), a.this.clone()));
6123 }
6124 Expression::Column(c) => {
6125 fields.push((Some(c.name.name.clone()), expr.clone()));
6126 }
6127 _ => {
6128 fields.push((None, expr.clone()));
6129 }
6130 }
6131 }
6132 let struct_lit =
6133 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
6134 let mut new_select = sel.as_ref().clone();
6135 new_select.kind = None;
6136 new_select.expressions = vec![struct_lit];
6137 return Ok(Expression::Select(Box::new(new_select)));
6138 }
6139 }
6140 }
6141
6142 // Convert @variable -> ${variable} for Spark/Hive/Databricks
6143 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6144 && matches!(
6145 target,
6146 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6147 )
6148 {
6149 if let Expression::Parameter(ref p) = e {
6150 if p.style == crate::expressions::ParameterStyle::At {
6151 if let Some(ref name) = p.name {
6152 return Ok(Expression::Parameter(Box::new(
6153 crate::expressions::Parameter {
6154 name: Some(name.clone()),
6155 index: p.index,
6156 style: crate::expressions::ParameterStyle::DollarBrace,
6157 quoted: p.quoted,
6158 string_quoted: p.string_quoted,
6159 expression: None,
6160 },
6161 )));
6162 }
6163 }
6164 }
6165 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
6166 if let Expression::Column(ref col) = e {
6167 if col.name.name.starts_with('@') && col.table.is_none() {
6168 let var_name = col.name.name.trim_start_matches('@').to_string();
6169 return Ok(Expression::Parameter(Box::new(
6170 crate::expressions::Parameter {
6171 name: Some(var_name),
6172 index: None,
6173 style: crate::expressions::ParameterStyle::DollarBrace,
6174 quoted: false,
6175 string_quoted: false,
6176 expression: None,
6177 },
6178 )));
6179 }
6180 }
6181 }
6182
6183 // Convert @variable -> variable in SET statements for Spark/Databricks
6184 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6185 && matches!(target, DialectType::Spark | DialectType::Databricks)
6186 {
6187 if let Expression::SetStatement(ref s) = e {
6188 let mut new_items = s.items.clone();
6189 let mut changed = false;
6190 for item in &mut new_items {
6191 // Strip @ from the SET name (Parameter style)
6192 if let Expression::Parameter(ref p) = item.name {
6193 if p.style == crate::expressions::ParameterStyle::At {
6194 if let Some(ref name) = p.name {
6195 item.name = Expression::Identifier(Identifier::new(name));
6196 changed = true;
6197 }
6198 }
6199 }
6200 // Strip @ from the SET name (Identifier style - SET parser)
6201 if let Expression::Identifier(ref id) = item.name {
6202 if id.name.starts_with('@') {
6203 let var_name = id.name.trim_start_matches('@').to_string();
6204 item.name = Expression::Identifier(Identifier::new(&var_name));
6205 changed = true;
6206 }
6207 }
6208 // Strip @ from the SET name (Column style - alternative parsing)
6209 if let Expression::Column(ref col) = item.name {
6210 if col.name.name.starts_with('@') && col.table.is_none() {
6211 let var_name = col.name.name.trim_start_matches('@').to_string();
6212 item.name = Expression::Identifier(Identifier::new(&var_name));
6213 changed = true;
6214 }
6215 }
6216 }
6217 if changed {
6218 let mut new_set = (**s).clone();
6219 new_set.items = new_items;
6220 return Ok(Expression::SetStatement(Box::new(new_set)));
6221 }
6222 }
6223 }
6224
6225 // Strip NOLOCK hint for non-TSQL targets
6226 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6227 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6228 {
6229 if let Expression::Table(ref tr) = e {
6230 if !tr.hints.is_empty() {
6231 let mut new_tr = tr.clone();
6232 new_tr.hints.clear();
6233 return Ok(Expression::Table(new_tr));
6234 }
6235 }
6236 }
6237
6238 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
6239 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
6240 if matches!(target, DialectType::Snowflake) {
6241 if let Expression::IsTrue(ref itf) = e {
6242 if let Expression::Boolean(ref b) = itf.this {
6243 if !itf.not {
6244 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6245 value: b.value,
6246 }));
6247 } else {
6248 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6249 value: !b.value,
6250 }));
6251 }
6252 }
6253 }
6254 if let Expression::IsFalse(ref itf) = e {
6255 if let Expression::Boolean(ref b) = itf.this {
6256 if !itf.not {
6257 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6258 value: !b.value,
6259 }));
6260 } else {
6261 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6262 value: b.value,
6263 }));
6264 }
6265 }
6266 }
6267 }
6268
6269 // BigQuery: split dotted backtick identifiers in table names
6270 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
6271 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
6272 if let Expression::CreateTable(ref ct) = e {
6273 let mut changed = false;
6274 let mut new_ct = ct.clone();
6275 // Split the table name
6276 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
6277 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
6278 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
6279 let was_quoted = ct.name.name.quoted;
6280 let mk_id = |s: &str| {
6281 if was_quoted {
6282 Identifier::quoted(s)
6283 } else {
6284 Identifier::new(s)
6285 }
6286 };
6287 if parts.len() == 3 {
6288 new_ct.name.catalog = Some(mk_id(parts[0]));
6289 new_ct.name.schema = Some(mk_id(parts[1]));
6290 new_ct.name.name = mk_id(parts[2]);
6291 changed = true;
6292 } else if parts.len() == 2 {
6293 new_ct.name.schema = Some(mk_id(parts[0]));
6294 new_ct.name.name = mk_id(parts[1]);
6295 changed = true;
6296 }
6297 }
6298 // Split the clone source name
6299 if let Some(ref clone_src) = ct.clone_source {
6300 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
6301 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
6302 let was_quoted = clone_src.name.quoted;
6303 let mk_id = |s: &str| {
6304 if was_quoted {
6305 Identifier::quoted(s)
6306 } else {
6307 Identifier::new(s)
6308 }
6309 };
6310 let mut new_src = clone_src.clone();
6311 if parts.len() == 3 {
6312 new_src.catalog = Some(mk_id(parts[0]));
6313 new_src.schema = Some(mk_id(parts[1]));
6314 new_src.name = mk_id(parts[2]);
6315 new_ct.clone_source = Some(new_src);
6316 changed = true;
6317 } else if parts.len() == 2 {
6318 new_src.schema = Some(mk_id(parts[0]));
6319 new_src.name = mk_id(parts[1]);
6320 new_ct.clone_source = Some(new_src);
6321 changed = true;
6322 }
6323 }
6324 }
6325 if changed {
6326 return Ok(Expression::CreateTable(new_ct));
6327 }
6328 }
6329 }
6330
6331 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
6332 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
6333 if matches!(source, DialectType::BigQuery)
6334 && matches!(
6335 target,
6336 DialectType::DuckDB
6337 | DialectType::Presto
6338 | DialectType::Trino
6339 | DialectType::Athena
6340 )
6341 {
6342 if let Expression::Subscript(ref sub) = e {
6343 let (new_index, is_safe) = match &sub.index {
6344 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
6345 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
6346 let Literal::Number(n) = lit.as_ref() else {
6347 unreachable!()
6348 };
6349 if let Ok(val) = n.parse::<i64>() {
6350 (
6351 Some(Expression::Literal(Box::new(Literal::Number(
6352 (val + 1).to_string(),
6353 )))),
6354 false,
6355 )
6356 } else {
6357 (None, false)
6358 }
6359 }
6360 // OFFSET(n) -> n+1 (0-based)
6361 Expression::Function(ref f)
6362 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
6363 {
6364 if let Expression::Literal(lit) = &f.args[0] {
6365 if let Literal::Number(n) = lit.as_ref() {
6366 if let Ok(val) = n.parse::<i64>() {
6367 (
6368 Some(Expression::Literal(Box::new(Literal::Number(
6369 (val + 1).to_string(),
6370 )))),
6371 false,
6372 )
6373 } else {
6374 (
6375 Some(Expression::Add(Box::new(
6376 crate::expressions::BinaryOp::new(
6377 f.args[0].clone(),
6378 Expression::number(1),
6379 ),
6380 ))),
6381 false,
6382 )
6383 }
6384 } else {
6385 (None, false)
6386 }
6387 } else {
6388 (
6389 Some(Expression::Add(Box::new(
6390 crate::expressions::BinaryOp::new(
6391 f.args[0].clone(),
6392 Expression::number(1),
6393 ),
6394 ))),
6395 false,
6396 )
6397 }
6398 }
6399 // ORDINAL(n) -> n (already 1-based)
6400 Expression::Function(ref f)
6401 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
6402 {
6403 (Some(f.args[0].clone()), false)
6404 }
6405 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
6406 Expression::Function(ref f)
6407 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
6408 {
6409 if let Expression::Literal(lit) = &f.args[0] {
6410 if let Literal::Number(n) = lit.as_ref() {
6411 if let Ok(val) = n.parse::<i64>() {
6412 (
6413 Some(Expression::Literal(Box::new(Literal::Number(
6414 (val + 1).to_string(),
6415 )))),
6416 true,
6417 )
6418 } else {
6419 (
6420 Some(Expression::Add(Box::new(
6421 crate::expressions::BinaryOp::new(
6422 f.args[0].clone(),
6423 Expression::number(1),
6424 ),
6425 ))),
6426 true,
6427 )
6428 }
6429 } else {
6430 (None, false)
6431 }
6432 } else {
6433 (
6434 Some(Expression::Add(Box::new(
6435 crate::expressions::BinaryOp::new(
6436 f.args[0].clone(),
6437 Expression::number(1),
6438 ),
6439 ))),
6440 true,
6441 )
6442 }
6443 }
6444 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
6445 Expression::Function(ref f)
6446 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
6447 {
6448 (Some(f.args[0].clone()), true)
6449 }
6450 _ => (None, false),
6451 };
6452 if let Some(idx) = new_index {
6453 if is_safe
6454 && matches!(
6455 target,
6456 DialectType::Presto | DialectType::Trino | DialectType::Athena
6457 )
6458 {
6459 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
6460 return Ok(Expression::Function(Box::new(Function::new(
6461 "ELEMENT_AT".to_string(),
6462 vec![sub.this.clone(), idx],
6463 ))));
6464 } else {
6465 // DuckDB or non-safe: just use subscript with converted index
6466 return Ok(Expression::Subscript(Box::new(
6467 crate::expressions::Subscript {
6468 this: sub.this.clone(),
6469 index: idx,
6470 },
6471 )));
6472 }
6473 }
6474 }
6475 }
6476
6477 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
6478 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
6479 if let Expression::Length(ref uf) = e {
6480 let arg = uf.this.clone();
6481 let typeof_func = Expression::Function(Box::new(Function::new(
6482 "TYPEOF".to_string(),
6483 vec![arg.clone()],
6484 )));
6485 let blob_cast = Expression::Cast(Box::new(Cast {
6486 this: arg.clone(),
6487 to: DataType::VarBinary { length: None },
6488 trailing_comments: vec![],
6489 double_colon_syntax: false,
6490 format: None,
6491 default: None,
6492 inferred_type: None,
6493 }));
6494 let octet_length = Expression::Function(Box::new(Function::new(
6495 "OCTET_LENGTH".to_string(),
6496 vec![blob_cast],
6497 )));
6498 let text_cast = Expression::Cast(Box::new(Cast {
6499 this: arg,
6500 to: DataType::Text,
6501 trailing_comments: vec![],
6502 double_colon_syntax: false,
6503 format: None,
6504 default: None,
6505 inferred_type: None,
6506 }));
6507 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
6508 this: text_cast,
6509 original_name: None,
6510 inferred_type: None,
6511 }));
6512 return Ok(Expression::Case(Box::new(Case {
6513 operand: Some(typeof_func),
6514 whens: vec![(
6515 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
6516 octet_length,
6517 )],
6518 else_: Some(length_text),
6519 comments: Vec::new(),
6520 inferred_type: None,
6521 })));
6522 }
6523 }
6524
6525 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
6526 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
6527 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
6528 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
6529 if let Expression::Alias(ref a) = e {
6530 if matches!(&a.this, Expression::Unnest(_)) {
6531 if a.column_aliases.is_empty() {
6532 // Drop the entire alias, return just the UNNEST expression
6533 return Ok(a.this.clone());
6534 } else {
6535 // Use first column alias as the main alias
6536 let mut new_alias = a.as_ref().clone();
6537 new_alias.alias = a.column_aliases[0].clone();
6538 new_alias.column_aliases.clear();
6539 return Ok(Expression::Alias(Box::new(new_alias)));
6540 }
6541 }
6542 }
6543 }
6544
6545 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
6546 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
6547 if let Expression::In(ref in_expr) = e {
6548 if let Some(ref unnest_inner) = in_expr.unnest {
6549 // Build the function call for the target dialect
6550 let func_expr = if matches!(
6551 target,
6552 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6553 ) {
6554 // Use EXPLODE for Hive/Spark
6555 Expression::Function(Box::new(Function::new(
6556 "EXPLODE".to_string(),
6557 vec![*unnest_inner.clone()],
6558 )))
6559 } else {
6560 // Use UNNEST for Presto/Trino/DuckDB/etc.
6561 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
6562 this: *unnest_inner.clone(),
6563 expressions: Vec::new(),
6564 with_ordinality: false,
6565 alias: None,
6566 offset_alias: None,
6567 }))
6568 };
6569
6570 // Wrap in SELECT
6571 let mut inner_select = crate::expressions::Select::new();
6572 inner_select.expressions = vec![func_expr];
6573
6574 let subquery_expr = Expression::Select(Box::new(inner_select));
6575
6576 return Ok(Expression::In(Box::new(crate::expressions::In {
6577 this: in_expr.this.clone(),
6578 expressions: Vec::new(),
6579 query: Some(subquery_expr),
6580 not: in_expr.not,
6581 global: in_expr.global,
6582 unnest: None,
6583 is_field: false,
6584 })));
6585 }
6586 }
6587 }
6588
6589 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
6590 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
6591 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
6592 if let Expression::Alias(ref a) = e {
6593 if let Expression::Function(ref f) = a.this {
6594 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
6595 && !a.column_aliases.is_empty()
6596 {
6597 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
6598 let col_alias = a.column_aliases[0].clone();
6599 let mut inner_select = crate::expressions::Select::new();
6600 inner_select.expressions =
6601 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
6602 Expression::Identifier(Identifier::new("value".to_string())),
6603 col_alias,
6604 )))];
6605 inner_select.from = Some(crate::expressions::From {
6606 expressions: vec![a.this.clone()],
6607 });
6608 let subquery =
6609 Expression::Subquery(Box::new(crate::expressions::Subquery {
6610 this: Expression::Select(Box::new(inner_select)),
6611 alias: Some(a.alias.clone()),
6612 column_aliases: Vec::new(),
6613 alias_explicit_as: false,
6614 alias_keyword: None,
6615 order_by: None,
6616 limit: None,
6617 offset: None,
6618 lateral: false,
6619 modifiers_inside: false,
6620 trailing_comments: Vec::new(),
6621 distribute_by: None,
6622 sort_by: None,
6623 cluster_by: None,
6624 inferred_type: None,
6625 }));
6626 return Ok(subquery);
6627 }
6628 }
6629 }
6630 }
6631
6632 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
6633 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
6634 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
6635 if matches!(source, DialectType::BigQuery) {
6636 if let Expression::Select(ref s) = e {
6637 if let Some(ref from) = s.from {
6638 if from.expressions.len() >= 2 {
6639 // Collect table names from first expression
6640 let first_tables: Vec<String> = from
6641 .expressions
6642 .iter()
6643 .take(1)
6644 .filter_map(|expr| {
6645 if let Expression::Table(t) = expr {
6646 Some(t.name.name.to_ascii_lowercase())
6647 } else {
6648 None
6649 }
6650 })
6651 .collect();
6652
6653 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
6654 // or have a dotted name matching a table
6655 let mut needs_rewrite = false;
6656 for expr in from.expressions.iter().skip(1) {
6657 if let Expression::Table(t) = expr {
6658 if let Some(ref schema) = t.schema {
6659 if first_tables.contains(&schema.name.to_ascii_lowercase())
6660 {
6661 needs_rewrite = true;
6662 break;
6663 }
6664 }
6665 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
6666 if t.schema.is_none() && t.name.name.contains('.') {
6667 let parts: Vec<&str> = t.name.name.split('.').collect();
6668 if parts.len() >= 2
6669 && first_tables.contains(&parts[0].to_ascii_lowercase())
6670 {
6671 needs_rewrite = true;
6672 break;
6673 }
6674 }
6675 }
6676 }
6677
6678 if needs_rewrite {
6679 let mut new_select = s.clone();
6680 let mut new_from_exprs = vec![from.expressions[0].clone()];
6681 let mut new_joins = s.joins.clone();
6682
6683 for expr in from.expressions.iter().skip(1) {
6684 if let Expression::Table(ref t) = expr {
6685 if let Some(ref schema) = t.schema {
6686 if first_tables
6687 .contains(&schema.name.to_ascii_lowercase())
6688 {
6689 // This is an array path reference, convert to CROSS JOIN UNNEST
6690 let col_expr = Expression::Column(Box::new(
6691 crate::expressions::Column {
6692 name: t.name.clone(),
6693 table: Some(schema.clone()),
6694 join_mark: false,
6695 trailing_comments: vec![],
6696 span: None,
6697 inferred_type: None,
6698 },
6699 ));
6700 let unnest_expr = Expression::Unnest(Box::new(
6701 crate::expressions::UnnestFunc {
6702 this: col_expr,
6703 expressions: Vec::new(),
6704 with_ordinality: false,
6705 alias: None,
6706 offset_alias: None,
6707 },
6708 ));
6709 let join_this = if let Some(ref alias) = t.alias {
6710 if matches!(
6711 target,
6712 DialectType::Presto
6713 | DialectType::Trino
6714 | DialectType::Athena
6715 ) {
6716 // Presto: UNNEST(x) AS _t0(results)
6717 Expression::Alias(Box::new(
6718 crate::expressions::Alias {
6719 this: unnest_expr,
6720 alias: Identifier::new("_t0"),
6721 column_aliases: vec![alias.clone()],
6722 alias_explicit_as: false,
6723 alias_keyword: None,
6724 pre_alias_comments: vec![],
6725 trailing_comments: vec![],
6726 inferred_type: None,
6727 },
6728 ))
6729 } else {
6730 // BigQuery: UNNEST(x) AS results
6731 Expression::Alias(Box::new(
6732 crate::expressions::Alias {
6733 this: unnest_expr,
6734 alias: alias.clone(),
6735 column_aliases: vec![],
6736 alias_explicit_as: false,
6737 alias_keyword: None,
6738 pre_alias_comments: vec![],
6739 trailing_comments: vec![],
6740 inferred_type: None,
6741 },
6742 ))
6743 }
6744 } else {
6745 unnest_expr
6746 };
6747 new_joins.push(crate::expressions::Join {
6748 kind: crate::expressions::JoinKind::Cross,
6749 this: join_this,
6750 on: None,
6751 using: Vec::new(),
6752 use_inner_keyword: false,
6753 use_outer_keyword: false,
6754 deferred_condition: false,
6755 join_hint: None,
6756 match_condition: None,
6757 pivots: Vec::new(),
6758 comments: Vec::new(),
6759 nesting_group: 0,
6760 directed: false,
6761 });
6762 } else {
6763 new_from_exprs.push(expr.clone());
6764 }
6765 } else if t.schema.is_none() && t.name.name.contains('.') {
6766 // Dotted name in quoted identifier: `Coordinates.position`
6767 let parts: Vec<&str> = t.name.name.split('.').collect();
6768 if parts.len() >= 2
6769 && first_tables
6770 .contains(&parts[0].to_ascii_lowercase())
6771 {
6772 let join_this =
6773 if matches!(target, DialectType::BigQuery) {
6774 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
6775 Expression::Table(t.clone())
6776 } else {
6777 // Other targets: split into "schema"."name"
6778 let mut new_t = t.clone();
6779 new_t.schema =
6780 Some(Identifier::quoted(parts[0]));
6781 new_t.name = Identifier::quoted(parts[1]);
6782 Expression::Table(new_t)
6783 };
6784 new_joins.push(crate::expressions::Join {
6785 kind: crate::expressions::JoinKind::Cross,
6786 this: join_this,
6787 on: None,
6788 using: Vec::new(),
6789 use_inner_keyword: false,
6790 use_outer_keyword: false,
6791 deferred_condition: false,
6792 join_hint: None,
6793 match_condition: None,
6794 pivots: Vec::new(),
6795 comments: Vec::new(),
6796 nesting_group: 0,
6797 directed: false,
6798 });
6799 } else {
6800 new_from_exprs.push(expr.clone());
6801 }
6802 } else {
6803 new_from_exprs.push(expr.clone());
6804 }
6805 } else {
6806 new_from_exprs.push(expr.clone());
6807 }
6808 }
6809
6810 new_select.from = Some(crate::expressions::From {
6811 expressions: new_from_exprs,
6812 ..from.clone()
6813 });
6814 new_select.joins = new_joins;
6815 return Ok(Expression::Select(new_select));
6816 }
6817 }
6818 }
6819 }
6820 }
6821
6822 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
6823 if matches!(
6824 target,
6825 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6826 ) {
6827 if let Expression::Select(ref s) = e {
6828 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
6829 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
6830 matches!(expr, Expression::Unnest(_))
6831 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
6832 };
6833 let has_unnest_join = s.joins.iter().any(|j| {
6834 j.kind == crate::expressions::JoinKind::Cross && (
6835 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
6836 || is_unnest_or_explode_expr(&j.this)
6837 )
6838 });
6839 if has_unnest_join {
6840 let mut select = s.clone();
6841 let mut new_joins = Vec::new();
6842 for join in select.joins.drain(..) {
6843 if join.kind == crate::expressions::JoinKind::Cross {
6844 // Extract the UNNEST/EXPLODE from the join
6845 let (func_expr, table_alias, col_aliases) = match &join.this {
6846 Expression::Alias(a) => {
6847 let ta = if a.alias.is_empty() {
6848 None
6849 } else {
6850 Some(a.alias.clone())
6851 };
6852 let cas = a.column_aliases.clone();
6853 match &a.this {
6854 Expression::Unnest(u) => {
6855 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
6856 if !u.expressions.is_empty() {
6857 let mut all_args = vec![u.this.clone()];
6858 all_args.extend(u.expressions.clone());
6859 let arrays_zip =
6860 Expression::Function(Box::new(
6861 crate::expressions::Function::new(
6862 "ARRAYS_ZIP".to_string(),
6863 all_args,
6864 ),
6865 ));
6866 let inline = Expression::Function(Box::new(
6867 crate::expressions::Function::new(
6868 "INLINE".to_string(),
6869 vec![arrays_zip],
6870 ),
6871 ));
6872 (Some(inline), ta, a.column_aliases.clone())
6873 } else {
6874 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
6875 let func_name = if u.with_ordinality {
6876 "POSEXPLODE"
6877 } else {
6878 "EXPLODE"
6879 };
6880 let explode = Expression::Function(Box::new(
6881 crate::expressions::Function::new(
6882 func_name.to_string(),
6883 vec![u.this.clone()],
6884 ),
6885 ));
6886 // For POSEXPLODE, add 'pos' to column aliases
6887 let cas = if u.with_ordinality {
6888 let mut pos_aliases =
6889 vec![Identifier::new(
6890 "pos".to_string(),
6891 )];
6892 pos_aliases
6893 .extend(a.column_aliases.clone());
6894 pos_aliases
6895 } else {
6896 a.column_aliases.clone()
6897 };
6898 (Some(explode), ta, cas)
6899 }
6900 }
6901 Expression::Function(f)
6902 if f.name.eq_ignore_ascii_case("EXPLODE") =>
6903 {
6904 (Some(Expression::Function(f.clone())), ta, cas)
6905 }
6906 _ => (None, None, Vec::new()),
6907 }
6908 }
6909 Expression::Unnest(u) => {
6910 let func_name = if u.with_ordinality {
6911 "POSEXPLODE"
6912 } else {
6913 "EXPLODE"
6914 };
6915 let explode = Expression::Function(Box::new(
6916 crate::expressions::Function::new(
6917 func_name.to_string(),
6918 vec![u.this.clone()],
6919 ),
6920 ));
6921 let ta = u.alias.clone();
6922 let col_aliases = if u.with_ordinality {
6923 vec![Identifier::new("pos".to_string())]
6924 } else {
6925 Vec::new()
6926 };
6927 (Some(explode), ta, col_aliases)
6928 }
6929 _ => (None, None, Vec::new()),
6930 };
6931 if let Some(func) = func_expr {
6932 select.lateral_views.push(crate::expressions::LateralView {
6933 this: func,
6934 table_alias,
6935 column_aliases: col_aliases,
6936 outer: false,
6937 });
6938 } else {
6939 new_joins.push(join);
6940 }
6941 } else {
6942 new_joins.push(join);
6943 }
6944 }
6945 select.joins = new_joins;
6946 return Ok(Expression::Select(select));
6947 }
6948 }
6949 }
6950
6951 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
6952 // for BigQuery, Presto/Trino, Snowflake
6953 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
6954 && matches!(
6955 target,
6956 DialectType::BigQuery
6957 | DialectType::Presto
6958 | DialectType::Trino
6959 | DialectType::Snowflake
6960 )
6961 {
6962 if let Expression::Select(ref s) = e {
6963 // Check if any SELECT expressions contain UNNEST
6964 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
6965 let has_unnest_in_select = s.expressions.iter().any(|expr| {
6966 fn contains_unnest(e: &Expression) -> bool {
6967 match e {
6968 Expression::Unnest(_) => true,
6969 Expression::Function(f)
6970 if f.name.eq_ignore_ascii_case("UNNEST") =>
6971 {
6972 true
6973 }
6974 Expression::Alias(a) => contains_unnest(&a.this),
6975 Expression::Add(op)
6976 | Expression::Sub(op)
6977 | Expression::Mul(op)
6978 | Expression::Div(op) => {
6979 contains_unnest(&op.left) || contains_unnest(&op.right)
6980 }
6981 _ => false,
6982 }
6983 }
6984 contains_unnest(expr)
6985 });
6986
6987 if has_unnest_in_select {
6988 let rewritten = Self::rewrite_unnest_expansion(s, target);
6989 if let Some(new_select) = rewritten {
6990 return Ok(Expression::Select(Box::new(new_select)));
6991 }
6992 }
6993 }
6994 }
6995
6996 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
6997 // BigQuery '\n' -> PostgreSQL literal newline in string
6998 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
6999 {
7000 if let Expression::Literal(ref lit) = e {
7001 if let Literal::String(ref s) = lit.as_ref() {
7002 if s.contains("\\n")
7003 || s.contains("\\t")
7004 || s.contains("\\r")
7005 || s.contains("\\\\")
7006 {
7007 let converted = s
7008 .replace("\\n", "\n")
7009 .replace("\\t", "\t")
7010 .replace("\\r", "\r")
7011 .replace("\\\\", "\\");
7012 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
7013 }
7014 }
7015 }
7016 }
7017
7018 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
7019 // when source != target (identity tests keep the Literal::Timestamp for native handling)
7020 if source != target {
7021 if let Expression::Literal(ref lit) = e {
7022 if let Literal::Timestamp(ref s) = lit.as_ref() {
7023 let s = s.clone();
7024 // MySQL: TIMESTAMP handling depends on source dialect
7025 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
7026 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
7027 if matches!(target, DialectType::MySQL) {
7028 if matches!(source, DialectType::BigQuery) {
7029 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
7030 return Ok(Expression::Function(Box::new(Function::new(
7031 "TIMESTAMP".to_string(),
7032 vec![Expression::Literal(Box::new(Literal::String(s)))],
7033 ))));
7034 } else {
7035 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
7036 return Ok(Expression::Cast(Box::new(Cast {
7037 this: Expression::Literal(Box::new(Literal::String(s))),
7038 to: DataType::Custom {
7039 name: "DATETIME".to_string(),
7040 },
7041 trailing_comments: Vec::new(),
7042 double_colon_syntax: false,
7043 format: None,
7044 default: None,
7045 inferred_type: None,
7046 })));
7047 }
7048 }
7049 let dt = match target {
7050 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
7051 name: "DATETIME".to_string(),
7052 },
7053 DialectType::Snowflake => {
7054 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
7055 if matches!(source, DialectType::BigQuery) {
7056 DataType::Custom {
7057 name: "TIMESTAMPTZ".to_string(),
7058 }
7059 } else if matches!(
7060 source,
7061 DialectType::PostgreSQL
7062 | DialectType::Redshift
7063 | DialectType::Snowflake
7064 ) {
7065 DataType::Timestamp {
7066 precision: None,
7067 timezone: false,
7068 }
7069 } else {
7070 DataType::Custom {
7071 name: "TIMESTAMPNTZ".to_string(),
7072 }
7073 }
7074 }
7075 DialectType::Spark | DialectType::Databricks => {
7076 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
7077 if matches!(source, DialectType::BigQuery) {
7078 DataType::Timestamp {
7079 precision: None,
7080 timezone: false,
7081 }
7082 } else {
7083 DataType::Custom {
7084 name: "TIMESTAMP_NTZ".to_string(),
7085 }
7086 }
7087 }
7088 DialectType::ClickHouse => DataType::Nullable {
7089 inner: Box::new(DataType::Custom {
7090 name: "DateTime".to_string(),
7091 }),
7092 },
7093 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
7094 name: "DATETIME2".to_string(),
7095 },
7096 DialectType::DuckDB => {
7097 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
7098 // or when the timestamp string explicitly has timezone info
7099 if matches!(source, DialectType::BigQuery)
7100 || Self::timestamp_string_has_timezone(&s)
7101 {
7102 DataType::Custom {
7103 name: "TIMESTAMPTZ".to_string(),
7104 }
7105 } else {
7106 DataType::Timestamp {
7107 precision: None,
7108 timezone: false,
7109 }
7110 }
7111 }
7112 _ => DataType::Timestamp {
7113 precision: None,
7114 timezone: false,
7115 },
7116 };
7117 return Ok(Expression::Cast(Box::new(Cast {
7118 this: Expression::Literal(Box::new(Literal::String(s))),
7119 to: dt,
7120 trailing_comments: vec![],
7121 double_colon_syntax: false,
7122 format: None,
7123 default: None,
7124 inferred_type: None,
7125 })));
7126 }
7127 }
7128 }
7129
7130 // PostgreSQL DELETE requires explicit AS for table aliases
7131 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
7132 if let Expression::Delete(ref del) = e {
7133 if del.alias.is_some() && !del.alias_explicit_as {
7134 let mut new_del = del.clone();
7135 new_del.alias_explicit_as = true;
7136 return Ok(Expression::Delete(new_del));
7137 }
7138 }
7139 }
7140
7141 // UNION/INTERSECT/EXCEPT DISTINCT handling:
7142 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
7143 // while others don't support it (Presto, Spark, DuckDB, etc.)
7144 {
7145 let needs_distinct =
7146 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
7147 let drop_distinct = matches!(
7148 target,
7149 DialectType::Presto
7150 | DialectType::Trino
7151 | DialectType::Athena
7152 | DialectType::Spark
7153 | DialectType::Databricks
7154 | DialectType::DuckDB
7155 | DialectType::Hive
7156 | DialectType::MySQL
7157 | DialectType::PostgreSQL
7158 | DialectType::SQLite
7159 | DialectType::TSQL
7160 | DialectType::Redshift
7161 | DialectType::Snowflake
7162 | DialectType::Oracle
7163 | DialectType::Teradata
7164 | DialectType::Drill
7165 | DialectType::Doris
7166 | DialectType::StarRocks
7167 );
7168 match &e {
7169 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
7170 let mut new_u = (**u).clone();
7171 new_u.distinct = true;
7172 return Ok(Expression::Union(Box::new(new_u)));
7173 }
7174 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
7175 let mut new_i = (**i).clone();
7176 new_i.distinct = true;
7177 return Ok(Expression::Intersect(Box::new(new_i)));
7178 }
7179 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
7180 let mut new_ex = (**ex).clone();
7181 new_ex.distinct = true;
7182 return Ok(Expression::Except(Box::new(new_ex)));
7183 }
7184 Expression::Union(u) if u.distinct && drop_distinct => {
7185 let mut new_u = (**u).clone();
7186 new_u.distinct = false;
7187 return Ok(Expression::Union(Box::new(new_u)));
7188 }
7189 Expression::Intersect(i) if i.distinct && drop_distinct => {
7190 let mut new_i = (**i).clone();
7191 new_i.distinct = false;
7192 return Ok(Expression::Intersect(Box::new(new_i)));
7193 }
7194 Expression::Except(ex) if ex.distinct && drop_distinct => {
7195 let mut new_ex = (**ex).clone();
7196 new_ex.distinct = false;
7197 return Ok(Expression::Except(Box::new(new_ex)));
7198 }
7199 _ => {}
7200 }
7201 }
7202
7203 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
7204 if matches!(target, DialectType::ClickHouse) {
7205 if let Expression::Function(ref f) = e {
7206 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
7207 let mut new_f = f.as_ref().clone();
7208 new_f.name = "map".to_string();
7209 return Ok(Expression::Function(Box::new(new_f)));
7210 }
7211 }
7212 }
7213
7214 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
7215 if matches!(target, DialectType::ClickHouse) {
7216 if let Expression::Intersect(ref i) = e {
7217 if i.all {
7218 let mut new_i = (**i).clone();
7219 new_i.all = false;
7220 return Ok(Expression::Intersect(Box::new(new_i)));
7221 }
7222 }
7223 }
7224
7225 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
7226 // Only from Generic source, to prevent double-wrapping
7227 if matches!(source, DialectType::Generic) {
7228 if let Expression::Div(ref op) = e {
7229 let cast_type = match target {
7230 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
7231 precision: None,
7232 scale: None,
7233 real_spelling: false,
7234 }),
7235 DialectType::Drill
7236 | DialectType::Trino
7237 | DialectType::Athena
7238 | DialectType::Presto => Some(DataType::Double {
7239 precision: None,
7240 scale: None,
7241 }),
7242 DialectType::PostgreSQL
7243 | DialectType::Redshift
7244 | DialectType::Materialize
7245 | DialectType::Teradata
7246 | DialectType::RisingWave => Some(DataType::Double {
7247 precision: None,
7248 scale: None,
7249 }),
7250 _ => None,
7251 };
7252 if let Some(dt) = cast_type {
7253 let cast_left = Expression::Cast(Box::new(Cast {
7254 this: op.left.clone(),
7255 to: dt,
7256 double_colon_syntax: false,
7257 trailing_comments: Vec::new(),
7258 format: None,
7259 default: None,
7260 inferred_type: None,
7261 }));
7262 let new_op = crate::expressions::BinaryOp {
7263 left: cast_left,
7264 right: op.right.clone(),
7265 left_comments: op.left_comments.clone(),
7266 operator_comments: op.operator_comments.clone(),
7267 trailing_comments: op.trailing_comments.clone(),
7268 inferred_type: None,
7269 };
7270 return Ok(Expression::Div(Box::new(new_op)));
7271 }
7272 }
7273 }
7274
7275 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
7276 if matches!(target, DialectType::DuckDB) {
7277 if let Expression::CreateDatabase(db) = e {
7278 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
7279 schema.if_not_exists = db.if_not_exists;
7280 return Ok(Expression::CreateSchema(Box::new(schema)));
7281 }
7282 if let Expression::DropDatabase(db) = e {
7283 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
7284 schema.if_exists = db.if_exists;
7285 return Ok(Expression::DropSchema(Box::new(schema)));
7286 }
7287 }
7288
7289 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
7290 if matches!(source, DialectType::ClickHouse)
7291 && !matches!(target, DialectType::ClickHouse)
7292 {
7293 if let Expression::Cast(ref c) = e {
7294 if let DataType::Custom { ref name } = c.to {
7295 if name.len() >= 9
7296 && name[..9].eq_ignore_ascii_case("NULLABLE(")
7297 && name.ends_with(")")
7298 {
7299 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
7300 let inner_upper = inner.to_ascii_uppercase();
7301 let new_dt = match inner_upper.as_str() {
7302 "DATETIME" | "DATETIME64" => DataType::Timestamp {
7303 precision: None,
7304 timezone: false,
7305 },
7306 "DATE" => DataType::Date,
7307 "INT64" | "BIGINT" => DataType::BigInt { length: None },
7308 "INT32" | "INT" | "INTEGER" => DataType::Int {
7309 length: None,
7310 integer_spelling: false,
7311 },
7312 "FLOAT64" | "DOUBLE" => DataType::Double {
7313 precision: None,
7314 scale: None,
7315 },
7316 "STRING" => DataType::Text,
7317 _ => DataType::Custom {
7318 name: inner.to_string(),
7319 },
7320 };
7321 let mut new_cast = c.clone();
7322 new_cast.to = new_dt;
7323 return Ok(Expression::Cast(new_cast));
7324 }
7325 }
7326 }
7327 }
7328
7329 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
7330 if matches!(target, DialectType::Snowflake) {
7331 if let Expression::ArrayConcatAgg(ref agg) = e {
7332 let mut agg_clone = agg.as_ref().clone();
7333 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
7334 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
7335 let flatten = Expression::Function(Box::new(Function::new(
7336 "ARRAY_FLATTEN".to_string(),
7337 vec![array_agg],
7338 )));
7339 return Ok(flatten);
7340 }
7341 }
7342
7343 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
7344 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
7345 if let Expression::ArrayConcatAgg(agg) = e {
7346 let arg = agg.this;
7347 return Ok(Expression::Function(Box::new(Function::new(
7348 "ARRAY_CONCAT_AGG".to_string(),
7349 vec![arg],
7350 ))));
7351 }
7352 }
7353
7354 // Determine what action to take by inspecting e immutably
7355 let action = {
7356 let source_propagates_nulls =
7357 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
7358 let target_ignores_nulls =
7359 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
7360
7361 match &e {
7362 Expression::Function(f) => {
7363 let name = f.name.to_ascii_uppercase();
7364 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
7365 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
7366 if name == "JSON"
7367 && f.args.len() == 1
7368 && matches!(source, DialectType::DuckDB)
7369 && matches!(
7370 target,
7371 DialectType::Presto | DialectType::Trino | DialectType::Athena
7372 )
7373 {
7374 Action::DuckDBJsonFuncToJsonParse
7375 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
7376 // SQL:2016 `x IS JSON` predicate which has matching semantics.
7377 } else if name == "JSON_VALID"
7378 && f.args.len() == 1
7379 && matches!(source, DialectType::DuckDB)
7380 && matches!(
7381 target,
7382 DialectType::Presto | DialectType::Trino | DialectType::Athena
7383 )
7384 {
7385 Action::DuckDBJsonValidToIsJson
7386 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
7387 } else if (name == "DATE_PART" || name == "DATEPART")
7388 && f.args.len() == 2
7389 && matches!(target, DialectType::Snowflake)
7390 && !matches!(source, DialectType::Snowflake)
7391 && matches!(
7392 &f.args[0],
7393 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7394 )
7395 {
7396 Action::DatePartUnquote
7397 } else if source_propagates_nulls
7398 && target_ignores_nulls
7399 && (name == "GREATEST" || name == "LEAST")
7400 && f.args.len() >= 2
7401 {
7402 Action::GreatestLeastNull
7403 } else if matches!(source, DialectType::Snowflake)
7404 && name == "ARRAY_GENERATE_RANGE"
7405 && f.args.len() >= 2
7406 {
7407 Action::ArrayGenerateRange
7408 } else if matches!(source, DialectType::Snowflake)
7409 && matches!(target, DialectType::DuckDB)
7410 && name == "DATE_TRUNC"
7411 && f.args.len() == 2
7412 {
7413 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
7414 // Logic based on Python sqlglot's input_type_preserved flag:
7415 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
7416 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
7417 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
7418 let unit_str = match &f.args[0] {
7419 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
7420 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
7421 Some(s.to_ascii_uppercase())
7422 }
7423 _ => None,
7424 };
7425 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
7426 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
7427 });
7428 match &f.args[1] {
7429 Expression::Cast(c) => match &c.to {
7430 DataType::Time { .. } => Action::DateTruncWrapCast,
7431 DataType::Custom { name }
7432 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
7433 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
7434 {
7435 Action::DateTruncWrapCast
7436 }
7437 DataType::Timestamp { timezone: true, .. } => {
7438 Action::DateTruncWrapCast
7439 }
7440 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
7441 DataType::Timestamp {
7442 timezone: false, ..
7443 } if is_date_unit => Action::DateTruncWrapCast,
7444 _ => Action::None,
7445 },
7446 _ => Action::None,
7447 }
7448 } else if matches!(source, DialectType::Snowflake)
7449 && matches!(target, DialectType::DuckDB)
7450 && name == "TO_DATE"
7451 && f.args.len() == 1
7452 && !matches!(
7453 &f.args[0],
7454 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7455 )
7456 {
7457 Action::ToDateToCast
7458 } else if !matches!(source, DialectType::Redshift)
7459 && matches!(target, DialectType::Redshift)
7460 && name == "CONVERT_TIMEZONE"
7461 && (f.args.len() == 2 || f.args.len() == 3)
7462 {
7463 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
7464 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
7465 // The Redshift parser adds 'UTC' as default source_tz, but when
7466 // transpiling from other dialects, we should preserve the original form.
7467 Action::ConvertTimezoneToExpr
7468 } else if matches!(source, DialectType::Snowflake)
7469 && matches!(target, DialectType::DuckDB)
7470 && name == "REGEXP_REPLACE"
7471 && f.args.len() == 4
7472 && !matches!(
7473 &f.args[3],
7474 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7475 )
7476 {
7477 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
7478 Action::RegexpReplaceSnowflakeToDuckDB
7479 } else if matches!(source, DialectType::Snowflake)
7480 && matches!(target, DialectType::DuckDB)
7481 && name == "REGEXP_REPLACE"
7482 && f.args.len() == 5
7483 {
7484 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
7485 Action::RegexpReplacePositionSnowflakeToDuckDB
7486 } else if matches!(source, DialectType::Snowflake)
7487 && matches!(target, DialectType::DuckDB)
7488 && name == "REGEXP_SUBSTR"
7489 {
7490 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
7491 Action::RegexpSubstrSnowflakeToDuckDB
7492 } else if matches!(source, DialectType::Snowflake)
7493 && matches!(target, DialectType::Snowflake)
7494 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
7495 && f.args.len() == 6
7496 {
7497 // Snowflake identity: strip trailing group=0
7498 Action::RegexpSubstrSnowflakeIdentity
7499 } else if matches!(source, DialectType::Snowflake)
7500 && matches!(target, DialectType::DuckDB)
7501 && name == "REGEXP_SUBSTR_ALL"
7502 {
7503 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
7504 Action::RegexpSubstrAllSnowflakeToDuckDB
7505 } else if matches!(source, DialectType::Snowflake)
7506 && matches!(target, DialectType::DuckDB)
7507 && name == "REGEXP_COUNT"
7508 {
7509 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
7510 Action::RegexpCountSnowflakeToDuckDB
7511 } else if matches!(source, DialectType::Snowflake)
7512 && matches!(target, DialectType::DuckDB)
7513 && name == "REGEXP_INSTR"
7514 {
7515 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
7516 Action::RegexpInstrSnowflakeToDuckDB
7517 } else if matches!(source, DialectType::BigQuery)
7518 && matches!(target, DialectType::Snowflake)
7519 && name == "REGEXP_EXTRACT_ALL"
7520 {
7521 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
7522 Action::RegexpExtractAllToSnowflake
7523 } else if name == "_BQ_TO_HEX" {
7524 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
7525 Action::BigQueryToHexBare
7526 } else if matches!(source, DialectType::BigQuery)
7527 && !matches!(target, DialectType::BigQuery)
7528 {
7529 // BigQuery-specific functions that need to be converted to standard forms
7530 match name.as_str() {
7531 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
7532 | "DATE_DIFF"
7533 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
7534 | "DATETIME_ADD" | "DATETIME_SUB"
7535 | "TIME_ADD" | "TIME_SUB"
7536 | "DATE_ADD" | "DATE_SUB"
7537 | "SAFE_DIVIDE"
7538 | "GENERATE_UUID"
7539 | "COUNTIF"
7540 | "EDIT_DISTANCE"
7541 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
7542 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
7543 | "TO_HEX"
7544 | "TO_JSON_STRING"
7545 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
7546 | "DIV"
7547 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
7548 | "LAST_DAY"
7549 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
7550 | "REGEXP_CONTAINS"
7551 | "CONTAINS_SUBSTR"
7552 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
7553 | "SAFE_CAST"
7554 | "GENERATE_DATE_ARRAY"
7555 | "PARSE_DATE" | "PARSE_TIMESTAMP"
7556 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
7557 | "ARRAY_CONCAT"
7558 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
7559 | "INSTR"
7560 | "MD5" | "SHA1" | "SHA256" | "SHA512"
7561 | "GENERATE_UUID()" // just in case
7562 | "REGEXP_EXTRACT_ALL"
7563 | "REGEXP_EXTRACT"
7564 | "INT64"
7565 | "ARRAY_CONCAT_AGG"
7566 | "DATE_DIFF(" // just in case
7567 | "TO_HEX_MD5" // internal
7568 | "MOD"
7569 | "CONCAT"
7570 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
7571 | "STRUCT"
7572 | "ROUND"
7573 | "MAKE_INTERVAL"
7574 | "ARRAY_TO_STRING"
7575 | "PERCENTILE_CONT"
7576 => Action::BigQueryFunctionNormalize,
7577 "ARRAY" if matches!(target, DialectType::Snowflake)
7578 && f.args.len() == 1
7579 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
7580 => Action::BigQueryArraySelectAsStructToSnowflake,
7581 _ => Action::None,
7582 }
7583 } else if matches!(source, DialectType::BigQuery)
7584 && matches!(target, DialectType::BigQuery)
7585 {
7586 // BigQuery -> BigQuery normalizations
7587 match name.as_str() {
7588 "TIMESTAMP_DIFF"
7589 | "DATETIME_DIFF"
7590 | "TIME_DIFF"
7591 | "DATE_DIFF"
7592 | "DATE_ADD"
7593 | "TO_HEX"
7594 | "CURRENT_TIMESTAMP"
7595 | "CURRENT_DATE"
7596 | "CURRENT_TIME"
7597 | "CURRENT_DATETIME"
7598 | "GENERATE_DATE_ARRAY"
7599 | "INSTR"
7600 | "FORMAT_DATETIME"
7601 | "DATETIME"
7602 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
7603 _ => Action::None,
7604 }
7605 } else {
7606 // Generic function normalization for non-BigQuery sources
7607 match name.as_str() {
7608 "ARBITRARY" | "AGGREGATE"
7609 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
7610 | "STRUCT_EXTRACT"
7611 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
7612 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
7613 | "SUBSTRINGINDEX"
7614 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
7615 | "UNICODE"
7616 | "XOR"
7617 | "ARRAY_REVERSE_SORT"
7618 | "ENCODE" | "DECODE"
7619 | "QUANTILE"
7620 | "EPOCH" | "EPOCH_MS"
7621 | "HASHBYTES"
7622 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
7623 | "APPROX_DISTINCT"
7624 | "DATE_PARSE" | "FORMAT_DATETIME"
7625 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
7626 | "RLIKE"
7627 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
7628 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
7629 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
7630 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
7631 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
7632 | "MAP" | "MAP_FROM_ENTRIES"
7633 | "COLLECT_LIST" | "COLLECT_SET"
7634 | "ISNAN" | "IS_NAN"
7635 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
7636 | "FORMAT_NUMBER"
7637 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
7638 | "ELEMENT_AT"
7639 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
7640 | "SPLIT_PART"
7641 // GENERATE_SERIES: handled separately below
7642 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
7643 | "JSON_QUERY" | "JSON_VALUE"
7644 | "JSON_SEARCH"
7645 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
7646 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
7647 | "CURDATE" | "CURTIME"
7648 | "ARRAY_TO_STRING"
7649 | "ARRAY_SORT" | "SORT_ARRAY"
7650 | "LEFT" | "RIGHT"
7651 | "MAP_FROM_ARRAYS"
7652 | "LIKE" | "ILIKE"
7653 | "ARRAY_CONCAT" | "LIST_CONCAT"
7654 | "QUANTILE_CONT" | "QUANTILE_DISC"
7655 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
7656 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
7657 | "LOCATE" | "STRPOS" | "INSTR"
7658 | "CHAR"
7659 // CONCAT: handled separately for COALESCE wrapping
7660 | "ARRAY_JOIN"
7661 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
7662 | "ISNULL"
7663 | "MONTHNAME"
7664 | "TO_TIMESTAMP"
7665 | "TO_DATE"
7666 | "TO_JSON"
7667 | "REGEXP_SPLIT"
7668 | "SPLIT"
7669 | "FORMATDATETIME"
7670 | "ARRAYJOIN"
7671 | "SPLITBYSTRING" | "SPLITBYREGEXP"
7672 | "NVL"
7673 | "TO_CHAR"
7674 | "DBMS_RANDOM.VALUE"
7675 | "REGEXP_LIKE"
7676 | "REPLICATE"
7677 | "LEN"
7678 | "COUNT_BIG"
7679 | "DATEFROMPARTS"
7680 | "DATETIMEFROMPARTS"
7681 | "CONVERT" | "TRY_CONVERT"
7682 | "STRFTIME" | "STRPTIME"
7683 | "DATE_FORMAT" | "FORMAT_DATE"
7684 | "PARSE_TIMESTAMP" | "PARSE_DATE"
7685 | "FROM_BASE64" | "TO_BASE64"
7686 | "GETDATE"
7687 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
7688 | "TO_UTF8" | "FROM_UTF8"
7689 | "STARTS_WITH" | "STARTSWITH"
7690 | "APPROX_COUNT_DISTINCT"
7691 | "JSON_FORMAT"
7692 | "SYSDATE"
7693 | "LOGICAL_OR" | "LOGICAL_AND"
7694 | "MONTHS_ADD"
7695 | "SCHEMA_NAME"
7696 | "STRTOL"
7697 | "EDITDIST3"
7698 | "FORMAT"
7699 | "LIST_CONTAINS" | "LIST_HAS"
7700 | "VARIANCE" | "STDDEV"
7701 | "ISINF"
7702 | "TO_UNIXTIME"
7703 | "FROM_UNIXTIME"
7704 | "DATEPART" | "DATE_PART"
7705 | "DATENAME"
7706 | "STRING_AGG"
7707 | "JSON_ARRAYAGG"
7708 | "APPROX_QUANTILE"
7709 | "MAKE_DATE"
7710 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
7711 | "RANGE"
7712 | "TRY_ELEMENT_AT"
7713 | "STR_TO_MAP"
7714 | "STRING"
7715 | "STR_TO_TIME"
7716 | "CURRENT_SCHEMA"
7717 | "LTRIM" | "RTRIM"
7718 | "UUID"
7719 | "FARM_FINGERPRINT"
7720 | "JSON_KEYS"
7721 | "WEEKOFYEAR"
7722 | "CONCAT_WS"
7723 | "TRY_DIVIDE"
7724 | "ARRAY_SLICE"
7725 | "ARRAY_PREPEND"
7726 | "ARRAY_REMOVE"
7727 | "GENERATE_DATE_ARRAY"
7728 | "PARSE_JSON"
7729 | "JSON_REMOVE"
7730 | "JSON_SET"
7731 | "LEVENSHTEIN"
7732 | "CURRENT_VERSION"
7733 | "ARRAY_MAX"
7734 | "ARRAY_MIN"
7735 | "JAROWINKLER_SIMILARITY"
7736 | "CURRENT_SCHEMAS"
7737 | "TO_VARIANT"
7738 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
7739 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
7740 => Action::GenericFunctionNormalize,
7741 // Canonical date functions -> dialect-specific
7742 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
7743 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
7744 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
7745 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
7746 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
7747 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
7748 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
7749 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
7750 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
7751 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
7752 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
7753 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
7754 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
7755 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
7756 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
7757 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
7758 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
7759 // STR_TO_DATE(x, fmt) -> dialect-specific
7760 "STR_TO_DATE" if f.args.len() == 2
7761 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
7762 "STR_TO_DATE" => Action::GenericFunctionNormalize,
7763 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
7764 "TS_OR_DS_ADD" if f.args.len() == 3
7765 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
7766 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
7767 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
7768 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
7769 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
7770 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
7771 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
7772 // IS_ASCII(x) -> dialect-specific
7773 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
7774 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
7775 "STR_POSITION" => Action::StrPositionConvert,
7776 // ARRAY_SUM -> dialect-specific
7777 "ARRAY_SUM" => Action::ArraySumConvert,
7778 // ARRAY_SIZE -> dialect-specific (Drill only)
7779 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
7780 // ARRAY_ANY -> dialect-specific
7781 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
7782 // Functions needing specific cross-dialect transforms
7783 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
7784 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
7785 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
7786 "ARRAY" if matches!(source, DialectType::BigQuery)
7787 && matches!(target, DialectType::Snowflake)
7788 && f.args.len() == 1
7789 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
7790 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
7791 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
7792 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
7793 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
7794 "DATE_TRUNC" if f.args.len() == 2
7795 && matches!(source, DialectType::Generic)
7796 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
7797 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
7798 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
7799 "TIMESTAMP_TRUNC" if f.args.len() >= 2
7800 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
7801 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
7802 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
7803 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7804 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
7805 // GENERATE_SERIES with interval normalization for PG target
7806 "GENERATE_SERIES" if f.args.len() >= 3
7807 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7808 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
7809 "GENERATE_SERIES" => Action::None, // passthrough for other cases
7810 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
7811 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7812 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
7813 "CONCAT" => Action::GenericFunctionNormalize,
7814 // DIV(a, b) -> target-specific integer division
7815 "DIV" if f.args.len() == 2
7816 && matches!(source, DialectType::PostgreSQL)
7817 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
7818 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7819 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
7820 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
7821 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
7822 "JSONB_EXISTS" if f.args.len() == 2
7823 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
7824 // DATE_BIN -> TIME_BUCKET for DuckDB
7825 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
7826 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
7827 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
7828 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
7829 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
7830 // ClickHouse any -> ANY_VALUE for other dialects
7831 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
7832 _ => Action::None,
7833 }
7834 }
7835 }
7836 Expression::AggregateFunction(af) => {
7837 let name = af.name.to_ascii_uppercase();
7838 match name.as_str() {
7839 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
7840 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
7841 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7842 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
7843 if matches!(target, DialectType::DuckDB) =>
7844 {
7845 Action::JsonObjectAggConvert
7846 }
7847 "ARRAY_AGG"
7848 if matches!(
7849 target,
7850 DialectType::Hive
7851 | DialectType::Spark
7852 | DialectType::Databricks
7853 ) =>
7854 {
7855 Action::ArrayAggToCollectList
7856 }
7857 "MAX_BY" | "MIN_BY"
7858 if matches!(
7859 target,
7860 DialectType::ClickHouse
7861 | DialectType::Spark
7862 | DialectType::Databricks
7863 | DialectType::DuckDB
7864 ) =>
7865 {
7866 Action::MaxByMinByConvert
7867 }
7868 "COLLECT_LIST"
7869 if matches!(
7870 target,
7871 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
7872 ) =>
7873 {
7874 Action::CollectListToArrayAgg
7875 }
7876 "COLLECT_SET"
7877 if matches!(
7878 target,
7879 DialectType::Presto
7880 | DialectType::Trino
7881 | DialectType::Snowflake
7882 | DialectType::DuckDB
7883 ) =>
7884 {
7885 Action::CollectSetConvert
7886 }
7887 "PERCENTILE"
7888 if matches!(
7889 target,
7890 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7891 ) =>
7892 {
7893 Action::PercentileConvert
7894 }
7895 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
7896 "CORR"
7897 if matches!(target, DialectType::DuckDB)
7898 && matches!(source, DialectType::Snowflake) =>
7899 {
7900 Action::CorrIsnanWrap
7901 }
7902 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
7903 "APPROX_QUANTILES"
7904 if matches!(source, DialectType::BigQuery)
7905 && matches!(target, DialectType::DuckDB) =>
7906 {
7907 Action::BigQueryApproxQuantiles
7908 }
7909 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
7910 "PERCENTILE_CONT"
7911 if matches!(source, DialectType::BigQuery)
7912 && matches!(target, DialectType::DuckDB)
7913 && af.args.len() >= 2 =>
7914 {
7915 Action::BigQueryPercentileContToDuckDB
7916 }
7917 _ => Action::None,
7918 }
7919 }
7920 Expression::JSONArrayAgg(_) => match target {
7921 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
7922 _ => Action::None,
7923 },
7924 Expression::ToNumber(tn) => {
7925 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
7926 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
7927 match target {
7928 DialectType::Oracle
7929 | DialectType::Snowflake
7930 | DialectType::Teradata => Action::None,
7931 _ => Action::GenericFunctionNormalize,
7932 }
7933 } else {
7934 Action::None
7935 }
7936 }
7937 Expression::Nvl2(_) => {
7938 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
7939 // Keep as NVL2 for dialects that support it natively
7940 match target {
7941 DialectType::Oracle
7942 | DialectType::Snowflake
7943 | DialectType::Teradata
7944 | DialectType::Spark
7945 | DialectType::Databricks
7946 | DialectType::Redshift => Action::None,
7947 _ => Action::Nvl2Expand,
7948 }
7949 }
7950 Expression::Decode(_) | Expression::DecodeCase(_) => {
7951 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
7952 // Keep as DECODE for Oracle/Snowflake
7953 match target {
7954 DialectType::Oracle | DialectType::Snowflake => Action::None,
7955 _ => Action::DecodeSimplify,
7956 }
7957 }
7958 Expression::Coalesce(ref cf) => {
7959 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
7960 // BigQuery keeps IFNULL natively when source is also BigQuery
7961 if cf.original_name.as_deref() == Some("IFNULL")
7962 && !(matches!(source, DialectType::BigQuery)
7963 && matches!(target, DialectType::BigQuery))
7964 {
7965 Action::IfnullToCoalesce
7966 } else {
7967 Action::None
7968 }
7969 }
7970 Expression::IfFunc(if_func) => {
7971 if matches!(source, DialectType::Snowflake)
7972 && matches!(
7973 target,
7974 DialectType::Presto | DialectType::Trino | DialectType::SQLite
7975 )
7976 && matches!(if_func.false_value, Some(Expression::Div(_)))
7977 {
7978 Action::Div0TypedDivision
7979 } else {
7980 Action::None
7981 }
7982 }
7983 Expression::ToJson(_) => match target {
7984 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
7985 DialectType::BigQuery => Action::ToJsonConvert,
7986 DialectType::DuckDB => Action::ToJsonConvert,
7987 _ => Action::None,
7988 },
7989 Expression::ArrayAgg(ref agg) => {
7990 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
7991 Action::ArrayAggToGroupConcat
7992 } else if matches!(
7993 target,
7994 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7995 ) {
7996 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
7997 Action::ArrayAggToCollectList
7998 } else if matches!(
7999 source,
8000 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8001 ) && matches!(target, DialectType::DuckDB)
8002 && agg.filter.is_some()
8003 {
8004 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
8005 // Need to add NOT x IS NULL to existing filter
8006 Action::ArrayAggNullFilter
8007 } else if matches!(target, DialectType::DuckDB)
8008 && agg.ignore_nulls == Some(true)
8009 && !agg.order_by.is_empty()
8010 {
8011 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
8012 Action::ArrayAggIgnoreNullsDuckDB
8013 } else if !matches!(source, DialectType::Snowflake) {
8014 Action::None
8015 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
8016 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
8017 || agg.name.is_none();
8018 if is_array_agg {
8019 Action::ArrayAggCollectList
8020 } else {
8021 Action::None
8022 }
8023 } else if matches!(
8024 target,
8025 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
8026 ) && agg.filter.is_none()
8027 {
8028 Action::ArrayAggFilter
8029 } else {
8030 Action::None
8031 }
8032 }
8033 Expression::WithinGroup(wg) => {
8034 if matches!(source, DialectType::Snowflake)
8035 && matches!(
8036 target,
8037 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
8038 )
8039 && matches!(wg.this, Expression::ArrayAgg(_))
8040 {
8041 Action::ArrayAggWithinGroupFilter
8042 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
8043 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
8044 || matches!(&wg.this, Expression::StringAgg(_))
8045 {
8046 Action::StringAggConvert
8047 } else if matches!(
8048 target,
8049 DialectType::Presto
8050 | DialectType::Trino
8051 | DialectType::Athena
8052 | DialectType::Spark
8053 | DialectType::Databricks
8054 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
8055 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
8056 || matches!(&wg.this, Expression::PercentileCont(_)))
8057 {
8058 Action::PercentileContConvert
8059 } else {
8060 Action::None
8061 }
8062 }
8063 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
8064 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
8065 // DATETIME is the timezone-unaware type
8066 Expression::Cast(ref c) => {
8067 if c.format.is_some()
8068 && (matches!(source, DialectType::BigQuery)
8069 || matches!(source, DialectType::Teradata))
8070 {
8071 Action::BigQueryCastFormat
8072 } else if matches!(target, DialectType::BigQuery)
8073 && !matches!(source, DialectType::BigQuery)
8074 && matches!(
8075 c.to,
8076 DataType::Timestamp {
8077 timezone: false,
8078 ..
8079 }
8080 )
8081 {
8082 Action::CastTimestampToDatetime
8083 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
8084 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
8085 && matches!(
8086 c.to,
8087 DataType::Timestamp {
8088 timezone: false,
8089 ..
8090 }
8091 )
8092 {
8093 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
8094 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
8095 Action::CastTimestampToDatetime
8096 } else if matches!(
8097 source,
8098 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8099 ) && matches!(
8100 target,
8101 DialectType::Presto
8102 | DialectType::Trino
8103 | DialectType::Athena
8104 | DialectType::DuckDB
8105 | DialectType::Snowflake
8106 | DialectType::BigQuery
8107 | DialectType::Databricks
8108 | DialectType::TSQL
8109 ) {
8110 Action::HiveCastToTryCast
8111 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
8112 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
8113 {
8114 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
8115 Action::CastTimestamptzToFunc
8116 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
8117 && matches!(
8118 target,
8119 DialectType::Hive
8120 | DialectType::Spark
8121 | DialectType::Databricks
8122 | DialectType::BigQuery
8123 )
8124 {
8125 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
8126 Action::CastTimestampStripTz
8127 } else if matches!(&c.to, DataType::Json)
8128 && matches!(source, DialectType::DuckDB)
8129 && matches!(target, DialectType::Snowflake)
8130 {
8131 Action::DuckDBCastJsonToVariant
8132 } else if matches!(&c.to, DataType::Json)
8133 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
8134 && matches!(
8135 target,
8136 DialectType::Presto
8137 | DialectType::Trino
8138 | DialectType::Athena
8139 | DialectType::Snowflake
8140 )
8141 {
8142 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
8143 // Only when the input is a string literal (JSON 'value' syntax)
8144 Action::JsonLiteralToJsonParse
8145 } else if matches!(&c.to, DataType::Json)
8146 && matches!(source, DialectType::DuckDB)
8147 && matches!(
8148 target,
8149 DialectType::Presto | DialectType::Trino | DialectType::Athena
8150 )
8151 {
8152 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
8153 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
8154 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
8155 // in the target to preserve DuckDB's parse semantics.
8156 Action::JsonLiteralToJsonParse
8157 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
8158 && matches!(target, DialectType::Spark | DialectType::Databricks)
8159 {
8160 // CAST(x AS JSON) -> TO_JSON(x) for Spark
8161 Action::CastToJsonForSpark
8162 } else if (matches!(
8163 &c.to,
8164 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
8165 )) && matches!(
8166 target,
8167 DialectType::Spark | DialectType::Databricks
8168 ) && (matches!(&c.this, Expression::ParseJson(_))
8169 || matches!(
8170 &c.this,
8171 Expression::Function(f)
8172 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
8173 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
8174 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
8175 ))
8176 {
8177 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
8178 // -> FROM_JSON(..., type_string) for Spark
8179 Action::CastJsonToFromJson
8180 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
8181 && matches!(
8182 c.to,
8183 DataType::Timestamp {
8184 timezone: false,
8185 ..
8186 }
8187 )
8188 && matches!(source, DialectType::DuckDB)
8189 {
8190 Action::StrftimeCastTimestamp
8191 } else if matches!(source, DialectType::DuckDB)
8192 && matches!(
8193 c.to,
8194 DataType::Decimal {
8195 precision: None,
8196 ..
8197 }
8198 )
8199 {
8200 Action::DecimalDefaultPrecision
8201 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
8202 && matches!(c.to, DataType::Char { length: None })
8203 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
8204 {
8205 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
8206 Action::MysqlCastCharToText
8207 } else if matches!(
8208 source,
8209 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8210 ) && matches!(
8211 target,
8212 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8213 ) && Self::has_varchar_char_type(&c.to)
8214 {
8215 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
8216 Action::SparkCastVarcharToString
8217 } else {
8218 Action::None
8219 }
8220 }
8221 Expression::SafeCast(ref c) => {
8222 if c.format.is_some()
8223 && matches!(source, DialectType::BigQuery)
8224 && !matches!(target, DialectType::BigQuery)
8225 {
8226 Action::BigQueryCastFormat
8227 } else {
8228 Action::None
8229 }
8230 }
8231 Expression::TryCast(ref c) => {
8232 if matches!(&c.to, DataType::Json)
8233 && matches!(source, DialectType::DuckDB)
8234 && matches!(
8235 target,
8236 DialectType::Presto | DialectType::Trino | DialectType::Athena
8237 )
8238 {
8239 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
8240 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
8241 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
8242 // to preserve DuckDB's parse-or-null semantics.
8243 Action::DuckDBTryCastJsonToTryJsonParse
8244 } else {
8245 Action::None
8246 }
8247 }
8248 Expression::JSONArray(ref ja)
8249 if matches!(target, DialectType::Snowflake)
8250 && ja.null_handling.is_none()
8251 && ja.return_type.is_none()
8252 && ja.strict.is_none() =>
8253 {
8254 Action::GenericFunctionNormalize
8255 }
8256 Expression::JsonArray(_) if matches!(target, DialectType::Snowflake) => {
8257 Action::GenericFunctionNormalize
8258 }
8259 // For DuckDB: DATE_TRUNC should preserve the input type
8260 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
8261 if matches!(source, DialectType::Snowflake)
8262 && matches!(target, DialectType::DuckDB)
8263 {
8264 Action::DateTruncWrapCast
8265 } else {
8266 Action::None
8267 }
8268 }
8269 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
8270 Expression::SetStatement(s) => {
8271 if matches!(target, DialectType::DuckDB)
8272 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
8273 && s.items.iter().any(|item| item.kind.is_none())
8274 {
8275 Action::SetToVariable
8276 } else {
8277 Action::None
8278 }
8279 }
8280 // Cross-dialect NULL ordering normalization.
8281 // When nulls_first is not specified, fill in the source dialect's implied
8282 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
8283 Expression::Ordered(o) => {
8284 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
8285 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
8286 Action::MysqlNullsOrdering
8287 } else {
8288 // Skip targets that don't support NULLS FIRST/LAST syntax
8289 let target_supports_nulls = !matches!(
8290 target,
8291 DialectType::MySQL
8292 | DialectType::TSQL
8293 | DialectType::StarRocks
8294 | DialectType::Doris
8295 );
8296 if o.nulls_first.is_none() && source != target && target_supports_nulls
8297 {
8298 Action::NullsOrdering
8299 } else {
8300 Action::None
8301 }
8302 }
8303 }
8304 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
8305 Expression::DataType(dt) => {
8306 if matches!(source, DialectType::BigQuery)
8307 && !matches!(target, DialectType::BigQuery)
8308 {
8309 match dt {
8310 DataType::Custom { ref name }
8311 if name.eq_ignore_ascii_case("INT64")
8312 || name.eq_ignore_ascii_case("FLOAT64")
8313 || name.eq_ignore_ascii_case("BOOL")
8314 || name.eq_ignore_ascii_case("BYTES")
8315 || name.eq_ignore_ascii_case("NUMERIC")
8316 || name.eq_ignore_ascii_case("STRING")
8317 || name.eq_ignore_ascii_case("DATETIME") =>
8318 {
8319 Action::BigQueryCastType
8320 }
8321 _ => Action::None,
8322 }
8323 } else if matches!(source, DialectType::TSQL) {
8324 // For TSQL source -> any target (including TSQL itself for REAL)
8325 match dt {
8326 // REAL -> FLOAT even for TSQL->TSQL
8327 DataType::Custom { ref name }
8328 if name.eq_ignore_ascii_case("REAL") =>
8329 {
8330 Action::TSQLTypeNormalize
8331 }
8332 DataType::Float {
8333 real_spelling: true,
8334 ..
8335 } => Action::TSQLTypeNormalize,
8336 // Other TSQL type normalizations only for non-TSQL targets
8337 DataType::Custom { ref name }
8338 if !matches!(target, DialectType::TSQL)
8339 && (name.eq_ignore_ascii_case("MONEY")
8340 || name.eq_ignore_ascii_case("SMALLMONEY")
8341 || name.eq_ignore_ascii_case("DATETIME2")
8342 || name.eq_ignore_ascii_case("IMAGE")
8343 || name.eq_ignore_ascii_case("BIT")
8344 || name.eq_ignore_ascii_case("ROWVERSION")
8345 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
8346 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
8347 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
8348 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
8349 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
8350 {
8351 Action::TSQLTypeNormalize
8352 }
8353 DataType::Float {
8354 precision: Some(_), ..
8355 } if !matches!(target, DialectType::TSQL) => {
8356 Action::TSQLTypeNormalize
8357 }
8358 DataType::TinyInt { .. }
8359 if !matches!(target, DialectType::TSQL) =>
8360 {
8361 Action::TSQLTypeNormalize
8362 }
8363 // INTEGER -> INT for Databricks/Spark targets
8364 DataType::Int {
8365 integer_spelling: true,
8366 ..
8367 } if matches!(
8368 target,
8369 DialectType::Databricks | DialectType::Spark
8370 ) =>
8371 {
8372 Action::TSQLTypeNormalize
8373 }
8374 _ => Action::None,
8375 }
8376 } else if (matches!(source, DialectType::Oracle)
8377 || matches!(source, DialectType::Generic))
8378 && !matches!(target, DialectType::Oracle)
8379 {
8380 match dt {
8381 DataType::Custom { ref name }
8382 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
8383 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
8384 || name.eq_ignore_ascii_case("VARCHAR2")
8385 || name.eq_ignore_ascii_case("NVARCHAR2") =>
8386 {
8387 Action::OracleVarchar2ToVarchar
8388 }
8389 _ => Action::None,
8390 }
8391 } else if matches!(target, DialectType::Snowflake)
8392 && !matches!(source, DialectType::Snowflake)
8393 {
8394 // When target is Snowflake but source is NOT Snowflake,
8395 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
8396 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
8397 // should keep their FLOAT spelling.
8398 match dt {
8399 DataType::Float { .. } => Action::SnowflakeFloatProtect,
8400 _ => Action::None,
8401 }
8402 } else {
8403 Action::None
8404 }
8405 }
8406 // LOWER patterns from BigQuery TO_HEX conversions:
8407 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
8408 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
8409 Expression::Lower(uf) => {
8410 if matches!(source, DialectType::BigQuery) {
8411 match &uf.this {
8412 Expression::Lower(_) => Action::BigQueryToHexLower,
8413 Expression::Function(f)
8414 if f.name == "TO_HEX"
8415 && matches!(target, DialectType::BigQuery) =>
8416 {
8417 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8418 Action::BigQueryToHexLower
8419 }
8420 _ => Action::None,
8421 }
8422 } else {
8423 Action::None
8424 }
8425 }
8426 // UPPER patterns from BigQuery TO_HEX conversions:
8427 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
8428 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
8429 Expression::Upper(uf) => {
8430 if matches!(source, DialectType::BigQuery) {
8431 match &uf.this {
8432 Expression::Lower(_) => Action::BigQueryToHexUpper,
8433 _ => Action::None,
8434 }
8435 } else {
8436 Action::None
8437 }
8438 }
8439 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
8440 // Snowflake supports LAST_DAY with unit, so keep it there
8441 Expression::LastDay(ld) => {
8442 if matches!(source, DialectType::BigQuery)
8443 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
8444 && ld.unit.is_some()
8445 {
8446 Action::BigQueryLastDayStripUnit
8447 } else {
8448 Action::None
8449 }
8450 }
8451 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
8452 Expression::SafeDivide(_) => {
8453 if matches!(source, DialectType::BigQuery)
8454 && !matches!(target, DialectType::BigQuery)
8455 {
8456 Action::BigQuerySafeDivide
8457 } else {
8458 Action::None
8459 }
8460 }
8461 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
8462 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
8463 Expression::AnyValue(ref agg) => {
8464 if matches!(source, DialectType::BigQuery)
8465 && matches!(target, DialectType::DuckDB)
8466 && agg.having_max.is_some()
8467 {
8468 Action::BigQueryAnyValueHaving
8469 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
8470 && !matches!(source, DialectType::Spark | DialectType::Databricks)
8471 && agg.ignore_nulls.is_none()
8472 {
8473 Action::AnyValueIgnoreNulls
8474 } else {
8475 Action::None
8476 }
8477 }
8478 Expression::Any(ref q) => {
8479 if matches!(source, DialectType::PostgreSQL)
8480 && matches!(
8481 target,
8482 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8483 )
8484 && q.op.is_some()
8485 && !matches!(
8486 q.subquery,
8487 Expression::Select(_) | Expression::Subquery(_)
8488 )
8489 {
8490 Action::AnyToExists
8491 } else {
8492 Action::None
8493 }
8494 }
8495 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
8496 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
8497 Expression::RegexpLike(_)
8498 if matches!(source, DialectType::Snowflake)
8499 && matches!(target, DialectType::DuckDB) =>
8500 {
8501 Action::RlikeSnowflakeToDuckDB
8502 }
8503 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
8504 Expression::RegexpLike(_)
8505 if !matches!(source, DialectType::DuckDB)
8506 && matches!(target, DialectType::DuckDB) =>
8507 {
8508 Action::RegexpLikeToDuckDB
8509 }
8510 // RegexpLike -> Exasol: anchor pattern with .*...*
8511 Expression::RegexpLike(_)
8512 if matches!(target, DialectType::Exasol) =>
8513 {
8514 Action::RegexpLikeExasolAnchor
8515 }
8516 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
8517 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
8518 Expression::Div(ref op)
8519 if matches!(
8520 source,
8521 DialectType::MySQL
8522 | DialectType::DuckDB
8523 | DialectType::SingleStore
8524 | DialectType::TiDB
8525 | DialectType::ClickHouse
8526 | DialectType::Doris
8527 ) && matches!(
8528 target,
8529 DialectType::PostgreSQL
8530 | DialectType::Redshift
8531 | DialectType::Drill
8532 | DialectType::Trino
8533 | DialectType::Presto
8534 | DialectType::Athena
8535 | DialectType::TSQL
8536 | DialectType::Teradata
8537 | DialectType::SQLite
8538 | DialectType::BigQuery
8539 | DialectType::Snowflake
8540 | DialectType::Databricks
8541 | DialectType::Oracle
8542 | DialectType::Materialize
8543 | DialectType::RisingWave
8544 ) =>
8545 {
8546 // Only wrap if RHS is not already NULLIF
8547 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
8548 {
8549 Action::MySQLSafeDivide
8550 } else {
8551 Action::None
8552 }
8553 }
8554 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
8555 // For TSQL/Fabric, convert to sp_rename instead
8556 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
8557 if let Some(crate::expressions::AlterTableAction::RenameTable(
8558 ref new_tbl,
8559 )) = at.actions.first()
8560 {
8561 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
8562 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
8563 Action::AlterTableToSpRename
8564 } else if new_tbl.schema.is_some()
8565 && matches!(
8566 target,
8567 DialectType::BigQuery
8568 | DialectType::Doris
8569 | DialectType::StarRocks
8570 | DialectType::DuckDB
8571 | DialectType::PostgreSQL
8572 | DialectType::Redshift
8573 )
8574 {
8575 Action::AlterTableRenameStripSchema
8576 } else {
8577 Action::None
8578 }
8579 } else {
8580 Action::None
8581 }
8582 }
8583 // EPOCH(x) expression -> target-specific epoch conversion
8584 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
8585 Action::EpochConvert
8586 }
8587 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
8588 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
8589 Action::EpochMsConvert
8590 }
8591 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
8592 Expression::StringAgg(_) => {
8593 if matches!(
8594 target,
8595 DialectType::MySQL
8596 | DialectType::SingleStore
8597 | DialectType::Doris
8598 | DialectType::StarRocks
8599 | DialectType::SQLite
8600 ) {
8601 Action::StringAggConvert
8602 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
8603 Action::StringAggConvert
8604 } else {
8605 Action::None
8606 }
8607 }
8608 Expression::CombinedParameterizedAgg(_) => Action::GenericFunctionNormalize,
8609 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
8610 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
8611 Expression::GroupConcat(_) => Action::GroupConcatConvert,
8612 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
8613 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
8614 Expression::Cardinality(_)
8615 if matches!(source, DialectType::DuckDB)
8616 && matches!(target, DialectType::DuckDB) =>
8617 {
8618 Action::None
8619 }
8620 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
8621 Action::ArrayLengthConvert
8622 }
8623 Expression::ArraySize(_) => {
8624 if matches!(target, DialectType::Drill) {
8625 Action::ArraySizeDrill
8626 } else {
8627 Action::ArrayLengthConvert
8628 }
8629 }
8630 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
8631 Expression::ArrayRemove(_) => match target {
8632 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
8633 Action::ArrayRemoveConvert
8634 }
8635 _ => Action::None,
8636 },
8637 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
8638 Expression::ArrayReverse(_) => match target {
8639 DialectType::ClickHouse => Action::ArrayReverseConvert,
8640 _ => Action::None,
8641 },
8642 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
8643 Expression::JsonKeys(_) => match target {
8644 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
8645 Action::JsonKeysConvert
8646 }
8647 _ => Action::None,
8648 },
8649 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
8650 Expression::ParseJson(_) => match target {
8651 DialectType::SQLite
8652 | DialectType::Doris
8653 | DialectType::MySQL
8654 | DialectType::StarRocks => Action::ParseJsonStrip,
8655 _ => Action::None,
8656 },
8657 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
8658 Expression::WeekOfYear(_)
8659 if matches!(target, DialectType::Snowflake)
8660 && !matches!(source, DialectType::Snowflake) =>
8661 {
8662 Action::WeekOfYearToWeekIso
8663 }
8664 // NVL: clear original_name so generator uses dialect-specific function names
8665 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
8666 // XOR: expand for dialects that don't support the XOR keyword
8667 Expression::Xor(_) => {
8668 let target_supports_xor = matches!(
8669 target,
8670 DialectType::MySQL
8671 | DialectType::SingleStore
8672 | DialectType::Doris
8673 | DialectType::StarRocks
8674 );
8675 if !target_supports_xor {
8676 Action::XorExpand
8677 } else {
8678 Action::None
8679 }
8680 }
8681 // TSQL #table -> temp table normalization (CREATE TABLE)
8682 Expression::CreateTable(ct)
8683 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8684 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8685 && ct.name.name.name.starts_with('#') =>
8686 {
8687 Action::TempTableHash
8688 }
8689 // TSQL #table -> strip # from table references in SELECT/etc.
8690 Expression::Table(tr)
8691 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8692 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8693 && tr.name.name.starts_with('#') =>
8694 {
8695 Action::TempTableHash
8696 }
8697 // TSQL #table -> strip # from DROP TABLE names
8698 Expression::DropTable(ref dt)
8699 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8700 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8701 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
8702 {
8703 Action::TempTableHash
8704 }
8705 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8706 Expression::JsonExtract(_)
8707 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8708 {
8709 Action::JsonExtractToTsql
8710 }
8711 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8712 Expression::JsonExtractScalar(_)
8713 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8714 {
8715 Action::JsonExtractToTsql
8716 }
8717 // JSON_EXTRACT -> JSONExtractString for ClickHouse
8718 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
8719 Action::JsonExtractToClickHouse
8720 }
8721 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
8722 Expression::JsonExtractScalar(_)
8723 if matches!(target, DialectType::ClickHouse) =>
8724 {
8725 Action::JsonExtractToClickHouse
8726 }
8727 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
8728 Expression::JsonExtract(ref f)
8729 if !f.arrow_syntax
8730 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
8731 {
8732 Action::JsonExtractToArrow
8733 }
8734 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
8735 Expression::JsonExtract(ref f)
8736 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
8737 && !matches!(
8738 source,
8739 DialectType::PostgreSQL
8740 | DialectType::Redshift
8741 | DialectType::Materialize
8742 )
8743 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
8744 {
8745 Action::JsonExtractToGetJsonObject
8746 }
8747 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
8748 Expression::JsonExtract(_)
8749 if matches!(
8750 target,
8751 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8752 ) =>
8753 {
8754 Action::JsonExtractToGetJsonObject
8755 }
8756 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
8757 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
8758 Expression::JsonExtractScalar(ref f)
8759 if !f.arrow_syntax
8760 && !f.hash_arrow_syntax
8761 && matches!(
8762 target,
8763 DialectType::PostgreSQL
8764 | DialectType::Redshift
8765 | DialectType::Snowflake
8766 | DialectType::SQLite
8767 | DialectType::DuckDB
8768 ) =>
8769 {
8770 Action::JsonExtractScalarConvert
8771 }
8772 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
8773 Expression::JsonExtractScalar(_)
8774 if matches!(
8775 target,
8776 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8777 ) =>
8778 {
8779 Action::JsonExtractScalarToGetJsonObject
8780 }
8781 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
8782 Expression::JsonExtract(ref f)
8783 if !f.arrow_syntax
8784 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
8785 {
8786 Action::JsonPathNormalize
8787 }
8788 // JsonQuery (parsed JSON_QUERY) -> target-specific
8789 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
8790 // JsonValue (parsed JSON_VALUE) -> target-specific
8791 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
8792 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
8793 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
8794 Expression::AtTimeZone(_)
8795 if matches!(
8796 target,
8797 DialectType::Presto
8798 | DialectType::Trino
8799 | DialectType::Athena
8800 | DialectType::Spark
8801 | DialectType::Databricks
8802 | DialectType::BigQuery
8803 | DialectType::Snowflake
8804 ) =>
8805 {
8806 Action::AtTimeZoneConvert
8807 }
8808 // DAY_OF_WEEK -> dialect-specific
8809 Expression::DayOfWeek(_)
8810 if matches!(
8811 target,
8812 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
8813 ) =>
8814 {
8815 Action::DayOfWeekConvert
8816 }
8817 // CURRENT_USER -> CURRENT_USER() for Snowflake
8818 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
8819 Action::CurrentUserParens
8820 }
8821 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
8822 Expression::ElementAt(_)
8823 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
8824 {
8825 Action::ElementAtConvert
8826 }
8827 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
8828 Expression::ArrayFunc(ref arr)
8829 if !arr.bracket_notation
8830 && matches!(
8831 target,
8832 DialectType::Spark
8833 | DialectType::Databricks
8834 | DialectType::Hive
8835 | DialectType::BigQuery
8836 | DialectType::DuckDB
8837 | DialectType::Snowflake
8838 | DialectType::Presto
8839 | DialectType::Trino
8840 | DialectType::Athena
8841 | DialectType::ClickHouse
8842 | DialectType::StarRocks
8843 ) =>
8844 {
8845 Action::ArraySyntaxConvert
8846 }
8847 // VARIANCE expression -> varSamp for ClickHouse
8848 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
8849 Action::VarianceToClickHouse
8850 }
8851 // STDDEV expression -> stddevSamp for ClickHouse
8852 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
8853 Action::StddevToClickHouse
8854 }
8855 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
8856 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
8857 Action::ApproxQuantileConvert
8858 }
8859 // MonthsBetween -> target-specific
8860 Expression::MonthsBetween(_)
8861 if !matches!(
8862 target,
8863 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8864 ) =>
8865 {
8866 Action::MonthsBetweenConvert
8867 }
8868 // AddMonths -> target-specific DATEADD/DATE_ADD
8869 Expression::AddMonths(_) => Action::AddMonthsConvert,
8870 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
8871 Expression::MapFromArrays(_)
8872 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
8873 {
8874 Action::MapFromArraysConvert
8875 }
8876 // CURRENT_USER -> CURRENT_USER() for Spark
8877 Expression::CurrentUser(_)
8878 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
8879 {
8880 Action::CurrentUserSparkParens
8881 }
8882 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
8883 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
8884 if matches!(
8885 source,
8886 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8887 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
8888 && matches!(
8889 target,
8890 DialectType::DuckDB
8891 | DialectType::Presto
8892 | DialectType::Trino
8893 | DialectType::Athena
8894 | DialectType::PostgreSQL
8895 | DialectType::Redshift
8896 ) =>
8897 {
8898 Action::SparkDateFuncCast
8899 }
8900 // $parameter -> @parameter for BigQuery
8901 Expression::Parameter(ref p)
8902 if matches!(target, DialectType::BigQuery)
8903 && matches!(source, DialectType::DuckDB)
8904 && (p.style == crate::expressions::ParameterStyle::Dollar
8905 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
8906 {
8907 Action::DollarParamConvert
8908 }
8909 // EscapeString literal: normalize literal newlines to \n
8910 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
8911 =>
8912 {
8913 Action::EscapeStringNormalize
8914 }
8915 // straight_join: keep lowercase for DuckDB, quote for MySQL
8916 Expression::Column(ref col)
8917 if col.name.name == "STRAIGHT_JOIN"
8918 && col.table.is_none()
8919 && matches!(source, DialectType::DuckDB)
8920 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
8921 {
8922 Action::StraightJoinCase
8923 }
8924 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
8925 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
8926 Expression::Interval(ref iv)
8927 if matches!(
8928 target,
8929 DialectType::Snowflake
8930 | DialectType::PostgreSQL
8931 | DialectType::Redshift
8932 ) && iv.unit.is_some()
8933 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
8934 {
8935 Action::SnowflakeIntervalFormat
8936 }
8937 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
8938 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
8939 if let Some(ref sample) = ts.sample {
8940 if !sample.explicit_method {
8941 Action::TablesampleReservoir
8942 } else {
8943 Action::None
8944 }
8945 } else {
8946 Action::None
8947 }
8948 }
8949 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
8950 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
8951 Expression::TableSample(ref ts)
8952 if matches!(target, DialectType::Snowflake)
8953 && !matches!(source, DialectType::Snowflake)
8954 && ts.sample.is_some() =>
8955 {
8956 if let Some(ref sample) = ts.sample {
8957 if !sample.explicit_method {
8958 Action::TablesampleSnowflakeStrip
8959 } else {
8960 Action::None
8961 }
8962 } else {
8963 Action::None
8964 }
8965 }
8966 Expression::Table(ref t)
8967 if matches!(target, DialectType::Snowflake)
8968 && !matches!(source, DialectType::Snowflake)
8969 && t.table_sample.is_some() =>
8970 {
8971 if let Some(ref sample) = t.table_sample {
8972 if !sample.explicit_method {
8973 Action::TablesampleSnowflakeStrip
8974 } else {
8975 Action::None
8976 }
8977 } else {
8978 Action::None
8979 }
8980 }
8981 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
8982 Expression::AlterTable(ref at)
8983 if matches!(target, DialectType::TSQL | DialectType::Fabric)
8984 && !at.actions.is_empty()
8985 && matches!(
8986 at.actions.first(),
8987 Some(crate::expressions::AlterTableAction::RenameTable(_))
8988 ) =>
8989 {
8990 Action::AlterTableToSpRename
8991 }
8992 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
8993 Expression::Subscript(ref sub)
8994 if matches!(
8995 target,
8996 DialectType::BigQuery
8997 | DialectType::Hive
8998 | DialectType::Spark
8999 | DialectType::Databricks
9000 ) && matches!(
9001 source,
9002 DialectType::DuckDB
9003 | DialectType::PostgreSQL
9004 | DialectType::Presto
9005 | DialectType::Trino
9006 | DialectType::Redshift
9007 | DialectType::ClickHouse
9008 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
9009 {
9010 Action::ArrayIndexConvert
9011 }
9012 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
9013 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
9014 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
9015 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
9016 Expression::WindowFunction(ref wf) => {
9017 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
9018 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
9019 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
9020 if matches!(target, DialectType::BigQuery)
9021 && !is_row_number
9022 && !wf.over.order_by.is_empty()
9023 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
9024 {
9025 Action::BigQueryNullsOrdering
9026 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
9027 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
9028 } else {
9029 let source_nulls_last = matches!(source, DialectType::DuckDB);
9030 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
9031 matches!(
9032 f.kind,
9033 crate::expressions::WindowFrameKind::Range
9034 | crate::expressions::WindowFrameKind::Groups
9035 )
9036 });
9037 if source_nulls_last
9038 && matches!(target, DialectType::MySQL)
9039 && !wf.over.order_by.is_empty()
9040 && wf.over.order_by.iter().any(|o| !o.desc)
9041 && !has_range_frame
9042 {
9043 Action::MysqlNullsLastRewrite
9044 } else {
9045 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
9046 let is_ranking_window_func = matches!(
9047 &wf.this,
9048 Expression::FirstValue(_)
9049 | Expression::LastValue(_)
9050 | Expression::NthValue(_)
9051 );
9052 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
9053 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
9054 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
9055 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
9056 && f.exclude.is_none()
9057 });
9058 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
9059 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
9060 // Strip the default frame for Snowflake target
9061 Action::SnowflakeWindowFrameStrip
9062 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
9063 // Add default frame for non-Snowflake target
9064 Action::SnowflakeWindowFrameAdd
9065 } else {
9066 match &wf.this {
9067 Expression::FirstValue(ref vf)
9068 | Expression::LastValue(ref vf)
9069 if vf.ignore_nulls == Some(false) =>
9070 {
9071 match target {
9072 DialectType::SQLite => Action::RespectNullsConvert,
9073 _ => Action::None,
9074 }
9075 }
9076 _ => Action::None,
9077 }
9078 }
9079 } else {
9080 match &wf.this {
9081 Expression::FirstValue(ref vf)
9082 | Expression::LastValue(ref vf)
9083 if vf.ignore_nulls == Some(false) =>
9084 {
9085 // RESPECT NULLS
9086 match target {
9087 DialectType::SQLite | DialectType::PostgreSQL => {
9088 Action::RespectNullsConvert
9089 }
9090 _ => Action::None,
9091 }
9092 }
9093 _ => Action::None,
9094 }
9095 }
9096 }
9097 }
9098 }
9099 // CREATE TABLE a LIKE b -> dialect-specific transformations
9100 Expression::CreateTable(ref ct)
9101 if ct.columns.is_empty()
9102 && ct.constraints.iter().any(|c| {
9103 matches!(c, crate::expressions::TableConstraint::Like { .. })
9104 })
9105 && matches!(
9106 target,
9107 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
9108 ) =>
9109 {
9110 Action::CreateTableLikeToCtas
9111 }
9112 Expression::CreateTable(ref ct)
9113 if ct.columns.is_empty()
9114 && ct.constraints.iter().any(|c| {
9115 matches!(c, crate::expressions::TableConstraint::Like { .. })
9116 })
9117 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
9118 {
9119 Action::CreateTableLikeToSelectInto
9120 }
9121 Expression::CreateTable(ref ct)
9122 if ct.columns.is_empty()
9123 && ct.constraints.iter().any(|c| {
9124 matches!(c, crate::expressions::TableConstraint::Like { .. })
9125 })
9126 && matches!(target, DialectType::ClickHouse) =>
9127 {
9128 Action::CreateTableLikeToAs
9129 }
9130 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
9131 Expression::CreateTable(ref ct)
9132 if matches!(target, DialectType::DuckDB)
9133 && matches!(
9134 source,
9135 DialectType::DuckDB
9136 | DialectType::Spark
9137 | DialectType::Databricks
9138 | DialectType::Hive
9139 ) =>
9140 {
9141 let has_comment = ct.columns.iter().any(|c| {
9142 c.comment.is_some()
9143 || c.constraints.iter().any(|con| {
9144 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
9145 })
9146 });
9147 let has_props = !ct.properties.is_empty();
9148 if has_comment || has_props {
9149 Action::CreateTableStripComment
9150 } else {
9151 Action::None
9152 }
9153 }
9154 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
9155 Expression::Array(_)
9156 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
9157 {
9158 Action::ArrayConcatBracketConvert
9159 }
9160 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
9161 Expression::ArrayFunc(ref arr)
9162 if arr.bracket_notation
9163 && matches!(source, DialectType::BigQuery)
9164 && matches!(target, DialectType::Redshift) =>
9165 {
9166 Action::ArrayConcatBracketConvert
9167 }
9168 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
9169 Expression::BitwiseOrAgg(ref f)
9170 | Expression::BitwiseAndAgg(ref f)
9171 | Expression::BitwiseXorAgg(ref f) => {
9172 if matches!(target, DialectType::DuckDB) {
9173 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
9174 if let Expression::Cast(ref c) = f.this {
9175 match &c.to {
9176 DataType::Float { .. }
9177 | DataType::Double { .. }
9178 | DataType::Decimal { .. } => Action::BitAggFloatCast,
9179 DataType::Custom { ref name }
9180 if name.eq_ignore_ascii_case("REAL") =>
9181 {
9182 Action::BitAggFloatCast
9183 }
9184 _ => Action::None,
9185 }
9186 } else {
9187 Action::None
9188 }
9189 } else if matches!(target, DialectType::Snowflake) {
9190 Action::BitAggSnowflakeRename
9191 } else {
9192 Action::None
9193 }
9194 }
9195 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
9196 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
9197 Action::FilterToIff
9198 }
9199 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
9200 Expression::Avg(ref f)
9201 | Expression::Sum(ref f)
9202 | Expression::Min(ref f)
9203 | Expression::Max(ref f)
9204 | Expression::CountIf(ref f)
9205 | Expression::Stddev(ref f)
9206 | Expression::StddevPop(ref f)
9207 | Expression::StddevSamp(ref f)
9208 | Expression::Variance(ref f)
9209 | Expression::VarPop(ref f)
9210 | Expression::VarSamp(ref f)
9211 | Expression::Median(ref f)
9212 | Expression::Mode(ref f)
9213 | Expression::First(ref f)
9214 | Expression::Last(ref f)
9215 | Expression::ApproxDistinct(ref f)
9216 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
9217 {
9218 Action::AggFilterToIff
9219 }
9220 Expression::Count(ref c)
9221 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
9222 {
9223 Action::AggFilterToIff
9224 }
9225 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
9226 Expression::Count(ref c)
9227 if c.distinct
9228 && matches!(&c.this, Some(Expression::Tuple(_)))
9229 && matches!(
9230 target,
9231 DialectType::Presto
9232 | DialectType::Trino
9233 | DialectType::DuckDB
9234 | DialectType::PostgreSQL
9235 ) =>
9236 {
9237 Action::CountDistinctMultiArg
9238 }
9239 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
9240 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
9241 Action::JsonToGetPath
9242 }
9243 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
9244 Expression::Struct(_)
9245 if matches!(
9246 target,
9247 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
9248 ) && matches!(source, DialectType::DuckDB) =>
9249 {
9250 Action::StructToRow
9251 }
9252 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
9253 Expression::MapFunc(ref m)
9254 if m.curly_brace_syntax
9255 && matches!(
9256 target,
9257 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
9258 )
9259 && matches!(source, DialectType::DuckDB) =>
9260 {
9261 Action::StructToRow
9262 }
9263 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
9264 Expression::ApproxCountDistinct(_)
9265 if matches!(
9266 target,
9267 DialectType::Presto | DialectType::Trino | DialectType::Athena
9268 ) =>
9269 {
9270 Action::ApproxCountDistinctToApproxDistinct
9271 }
9272 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
9273 Expression::ArrayContains(_)
9274 if matches!(
9275 target,
9276 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
9277 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
9278 {
9279 Action::ArrayContainsConvert
9280 }
9281 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
9282 Expression::ArrayContains(_)
9283 if matches!(target, DialectType::DuckDB)
9284 && matches!(source, DialectType::Snowflake) =>
9285 {
9286 Action::ArrayContainsDuckDBConvert
9287 }
9288 // ARRAY_EXCEPT -> target-specific conversion
9289 Expression::ArrayExcept(_)
9290 if matches!(
9291 target,
9292 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
9293 ) =>
9294 {
9295 Action::ArrayExceptConvert
9296 }
9297 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
9298 Expression::ArrayPosition(_)
9299 if matches!(target, DialectType::Snowflake)
9300 && !matches!(source, DialectType::Snowflake) =>
9301 {
9302 Action::ArrayPositionSnowflakeSwap
9303 }
9304 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
9305 Expression::ArrayPosition(_)
9306 if matches!(target, DialectType::DuckDB)
9307 && matches!(source, DialectType::Snowflake) =>
9308 {
9309 Action::SnowflakeArrayPositionToDuckDB
9310 }
9311 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
9312 Expression::ArrayDistinct(_)
9313 if matches!(target, DialectType::ClickHouse) =>
9314 {
9315 Action::ArrayDistinctClickHouse
9316 }
9317 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
9318 Expression::ArrayDistinct(_)
9319 if matches!(target, DialectType::DuckDB)
9320 && matches!(source, DialectType::Snowflake) =>
9321 {
9322 Action::ArrayDistinctConvert
9323 }
9324 // StrPosition with position -> complex expansion for Presto/DuckDB
9325 // STRPOS doesn't support a position arg in these dialects
9326 Expression::StrPosition(ref sp)
9327 if sp.position.is_some()
9328 && matches!(
9329 target,
9330 DialectType::Presto
9331 | DialectType::Trino
9332 | DialectType::Athena
9333 | DialectType::DuckDB
9334 ) =>
9335 {
9336 Action::StrPositionExpand
9337 }
9338 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
9339 Expression::First(ref f)
9340 if f.ignore_nulls == Some(true)
9341 && matches!(target, DialectType::DuckDB) =>
9342 {
9343 Action::FirstToAnyValue
9344 }
9345 // BEGIN -> START TRANSACTION for Presto/Trino
9346 Expression::Command(ref cmd)
9347 if cmd.this.eq_ignore_ascii_case("BEGIN")
9348 && matches!(
9349 target,
9350 DialectType::Presto | DialectType::Trino | DialectType::Athena
9351 ) =>
9352 {
9353 // Handled inline below
9354 Action::None // We'll handle it directly
9355 }
9356 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
9357 // PostgreSQL # is parsed as BitwiseXor (which is correct).
9358 // a || b (Concat operator) -> CONCAT function for Presto/Trino
9359 Expression::Concat(ref _op)
9360 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9361 && matches!(target, DialectType::Presto | DialectType::Trino) =>
9362 {
9363 Action::PipeConcatToConcat
9364 }
9365 _ => Action::None,
9366 }
9367 };
9368
9369 match action {
9370 Action::None => {
9371 // Handle inline transforms that don't need a dedicated action
9372 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
9373 if let Some(rewritten) = Self::rewrite_tsql_interval_arithmetic(&e) {
9374 return Ok(rewritten);
9375 }
9376 }
9377
9378 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
9379 if let Expression::Between(ref b) = e {
9380 if let Some(sym) = b.symmetric {
9381 let keeps_symmetric =
9382 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
9383 if !keeps_symmetric {
9384 if sym {
9385 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
9386 let b = if let Expression::Between(b) = e {
9387 *b
9388 } else {
9389 unreachable!()
9390 };
9391 let between1 = Expression::Between(Box::new(
9392 crate::expressions::Between {
9393 this: b.this.clone(),
9394 low: b.low.clone(),
9395 high: b.high.clone(),
9396 not: b.not,
9397 symmetric: None,
9398 },
9399 ));
9400 let between2 = Expression::Between(Box::new(
9401 crate::expressions::Between {
9402 this: b.this,
9403 low: b.high,
9404 high: b.low,
9405 not: b.not,
9406 symmetric: None,
9407 },
9408 ));
9409 return Ok(Expression::Paren(Box::new(
9410 crate::expressions::Paren {
9411 this: Expression::Or(Box::new(
9412 crate::expressions::BinaryOp::new(
9413 between1, between2,
9414 ),
9415 )),
9416 trailing_comments: vec![],
9417 },
9418 )));
9419 } else {
9420 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
9421 let b = if let Expression::Between(b) = e {
9422 *b
9423 } else {
9424 unreachable!()
9425 };
9426 return Ok(Expression::Between(Box::new(
9427 crate::expressions::Between {
9428 this: b.this,
9429 low: b.low,
9430 high: b.high,
9431 not: b.not,
9432 symmetric: None,
9433 },
9434 )));
9435 }
9436 }
9437 }
9438 }
9439
9440 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
9441 if let Expression::ILike(ref _like) = e {
9442 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
9443 let like = if let Expression::ILike(l) = e {
9444 *l
9445 } else {
9446 unreachable!()
9447 };
9448 let lower_left = Expression::Function(Box::new(Function::new(
9449 "LOWER".to_string(),
9450 vec![like.left],
9451 )));
9452 let lower_right = Expression::Function(Box::new(Function::new(
9453 "LOWER".to_string(),
9454 vec![like.right],
9455 )));
9456 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
9457 left: lower_left,
9458 right: lower_right,
9459 escape: like.escape,
9460 quantifier: like.quantifier,
9461 inferred_type: None,
9462 })));
9463 }
9464 }
9465
9466 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
9467 if let Expression::MethodCall(ref mc) = e {
9468 if matches!(source, DialectType::Oracle)
9469 && mc.method.name.eq_ignore_ascii_case("VALUE")
9470 && mc.args.is_empty()
9471 {
9472 let is_dbms_random = match &mc.this {
9473 Expression::Identifier(id) => {
9474 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
9475 }
9476 Expression::Column(col) => {
9477 col.table.is_none()
9478 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
9479 }
9480 _ => false,
9481 };
9482 if is_dbms_random {
9483 let func_name = match target {
9484 DialectType::PostgreSQL
9485 | DialectType::Redshift
9486 | DialectType::DuckDB
9487 | DialectType::SQLite => "RANDOM",
9488 DialectType::Oracle => "DBMS_RANDOM.VALUE",
9489 _ => "RAND",
9490 };
9491 return Ok(Expression::Function(Box::new(Function::new(
9492 func_name.to_string(),
9493 vec![],
9494 ))));
9495 }
9496 }
9497 }
9498 // TRIM without explicit position -> add BOTH for ClickHouse
9499 if let Expression::Trim(ref trim) = e {
9500 if matches!(target, DialectType::ClickHouse)
9501 && trim.sql_standard_syntax
9502 && trim.characters.is_some()
9503 && !trim.position_explicit
9504 {
9505 let mut new_trim = (**trim).clone();
9506 new_trim.position_explicit = true;
9507 return Ok(Expression::Trim(Box::new(new_trim)));
9508 }
9509 }
9510 // BEGIN -> START TRANSACTION for Presto/Trino
9511 if let Expression::Transaction(ref txn) = e {
9512 if matches!(
9513 target,
9514 DialectType::Presto | DialectType::Trino | DialectType::Athena
9515 ) {
9516 // Convert BEGIN to START TRANSACTION by setting mark to "START"
9517 let mut txn = txn.clone();
9518 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
9519 "START".to_string(),
9520 ))));
9521 return Ok(Expression::Transaction(Box::new(*txn)));
9522 }
9523 }
9524 // IS TRUE/FALSE -> simplified forms for Presto/Trino
9525 if matches!(
9526 target,
9527 DialectType::Presto | DialectType::Trino | DialectType::Athena
9528 ) {
9529 match &e {
9530 Expression::IsTrue(itf) if !itf.not => {
9531 // x IS TRUE -> x
9532 return Ok(itf.this.clone());
9533 }
9534 Expression::IsTrue(itf) if itf.not => {
9535 // x IS NOT TRUE -> NOT x
9536 return Ok(Expression::Not(Box::new(
9537 crate::expressions::UnaryOp {
9538 this: itf.this.clone(),
9539 inferred_type: None,
9540 },
9541 )));
9542 }
9543 Expression::IsFalse(itf) if !itf.not => {
9544 // x IS FALSE -> NOT x
9545 return Ok(Expression::Not(Box::new(
9546 crate::expressions::UnaryOp {
9547 this: itf.this.clone(),
9548 inferred_type: None,
9549 },
9550 )));
9551 }
9552 Expression::IsFalse(itf) if itf.not => {
9553 // x IS NOT FALSE -> NOT NOT x
9554 let not_x =
9555 Expression::Not(Box::new(crate::expressions::UnaryOp {
9556 this: itf.this.clone(),
9557 inferred_type: None,
9558 }));
9559 return Ok(Expression::Not(Box::new(
9560 crate::expressions::UnaryOp {
9561 this: not_x,
9562 inferred_type: None,
9563 },
9564 )));
9565 }
9566 _ => {}
9567 }
9568 }
9569 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
9570 if matches!(target, DialectType::Redshift) {
9571 if let Expression::IsFalse(ref itf) = e {
9572 if itf.not {
9573 return Ok(Expression::Not(Box::new(
9574 crate::expressions::UnaryOp {
9575 this: Expression::IsFalse(Box::new(
9576 crate::expressions::IsTrueFalse {
9577 this: itf.this.clone(),
9578 not: false,
9579 },
9580 )),
9581 inferred_type: None,
9582 },
9583 )));
9584 }
9585 }
9586 }
9587 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
9588 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
9589 if let Expression::Function(ref f) = e {
9590 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
9591 && matches!(source, DialectType::Snowflake)
9592 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
9593 {
9594 if f.args.len() == 3 {
9595 let mut args = f.args.clone();
9596 args.push(Expression::string("g"));
9597 return Ok(Expression::Function(Box::new(Function::new(
9598 "REGEXP_REPLACE".to_string(),
9599 args,
9600 ))));
9601 } else if f.args.len() == 4 {
9602 // 4th arg might be position, add 'g' as 5th
9603 let mut args = f.args.clone();
9604 args.push(Expression::string("g"));
9605 return Ok(Expression::Function(Box::new(Function::new(
9606 "REGEXP_REPLACE".to_string(),
9607 args,
9608 ))));
9609 }
9610 }
9611 }
9612 Ok(e)
9613 }
9614
9615 Action::GreatestLeastNull => {
9616 let f = if let Expression::Function(f) = e {
9617 *f
9618 } else {
9619 unreachable!("action only triggered for Function expressions")
9620 };
9621 let mut null_checks: Vec<Expression> = f
9622 .args
9623 .iter()
9624 .map(|a| {
9625 Expression::IsNull(Box::new(IsNull {
9626 this: a.clone(),
9627 not: false,
9628 postfix_form: false,
9629 }))
9630 })
9631 .collect();
9632 let condition = if null_checks.len() == 1 {
9633 null_checks.remove(0)
9634 } else {
9635 let first = null_checks.remove(0);
9636 null_checks.into_iter().fold(first, |acc, check| {
9637 Expression::Or(Box::new(BinaryOp::new(acc, check)))
9638 })
9639 };
9640 Ok(Expression::Case(Box::new(Case {
9641 operand: None,
9642 whens: vec![(condition, Expression::Null(Null))],
9643 else_: Some(Expression::Function(Box::new(Function::new(
9644 f.name, f.args,
9645 )))),
9646 comments: Vec::new(),
9647 inferred_type: None,
9648 })))
9649 }
9650
9651 Action::ArrayGenerateRange => {
9652 let f = if let Expression::Function(f) = e {
9653 *f
9654 } else {
9655 unreachable!("action only triggered for Function expressions")
9656 };
9657 let start = f.args[0].clone();
9658 let end = f.args[1].clone();
9659 let step = f.args.get(2).cloned();
9660
9661 // Helper: compute end - 1 for converting exclusive→inclusive end.
9662 // When end is a literal number, simplify to a computed literal.
9663 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
9664 // Try to simplify literal numbers
9665 match end {
9666 Expression::Literal(lit)
9667 if matches!(lit.as_ref(), Literal::Number(_)) =>
9668 {
9669 let Literal::Number(n) = lit.as_ref() else {
9670 unreachable!()
9671 };
9672 if let Ok(val) = n.parse::<i64>() {
9673 return Expression::number(val - 1);
9674 }
9675 }
9676 Expression::Neg(u) => {
9677 if let Expression::Literal(lit) = &u.this {
9678 if let Literal::Number(n) = lit.as_ref() {
9679 if let Ok(val) = n.parse::<i64>() {
9680 return Expression::number(-val - 1);
9681 }
9682 }
9683 }
9684 }
9685 _ => {}
9686 }
9687 // Non-literal: produce end - 1 expression
9688 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
9689 }
9690
9691 match target {
9692 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
9693 // so no adjustment needed — just rename the function.
9694 DialectType::Snowflake => {
9695 let mut args = vec![start, end];
9696 if let Some(s) = step {
9697 args.push(s);
9698 }
9699 Ok(Expression::Function(Box::new(Function::new(
9700 "ARRAY_GENERATE_RANGE".to_string(),
9701 args,
9702 ))))
9703 }
9704 DialectType::DuckDB => {
9705 let mut args = vec![start, end];
9706 if let Some(s) = step {
9707 args.push(s);
9708 }
9709 Ok(Expression::Function(Box::new(Function::new(
9710 "RANGE".to_string(),
9711 args,
9712 ))))
9713 }
9714 // These dialects use inclusive end, so convert exclusive→inclusive.
9715 // Presto/Trino: simplify literal numbers (3 → 2).
9716 DialectType::Presto | DialectType::Trino => {
9717 let end_inclusive = exclusive_to_inclusive_end(&end);
9718 let mut args = vec![start, end_inclusive];
9719 if let Some(s) = step {
9720 args.push(s);
9721 }
9722 Ok(Expression::Function(Box::new(Function::new(
9723 "SEQUENCE".to_string(),
9724 args,
9725 ))))
9726 }
9727 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
9728 DialectType::PostgreSQL | DialectType::Redshift => {
9729 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9730 end.clone(),
9731 Expression::number(1),
9732 )));
9733 let mut args = vec![start, end_minus_1];
9734 if let Some(s) = step {
9735 args.push(s);
9736 }
9737 Ok(Expression::Function(Box::new(Function::new(
9738 "GENERATE_SERIES".to_string(),
9739 args,
9740 ))))
9741 }
9742 DialectType::BigQuery => {
9743 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9744 end.clone(),
9745 Expression::number(1),
9746 )));
9747 let mut args = vec![start, end_minus_1];
9748 if let Some(s) = step {
9749 args.push(s);
9750 }
9751 Ok(Expression::Function(Box::new(Function::new(
9752 "GENERATE_ARRAY".to_string(),
9753 args,
9754 ))))
9755 }
9756 _ => Ok(Expression::Function(Box::new(Function::new(
9757 f.name, f.args,
9758 )))),
9759 }
9760 }
9761
9762 Action::Div0TypedDivision => {
9763 let if_func = if let Expression::IfFunc(f) = e {
9764 *f
9765 } else {
9766 unreachable!("action only triggered for IfFunc expressions")
9767 };
9768 if let Some(Expression::Div(div)) = if_func.false_value {
9769 let cast_type = if matches!(target, DialectType::SQLite) {
9770 DataType::Float {
9771 precision: None,
9772 scale: None,
9773 real_spelling: true,
9774 }
9775 } else {
9776 DataType::Double {
9777 precision: None,
9778 scale: None,
9779 }
9780 };
9781 let casted_left = Expression::Cast(Box::new(Cast {
9782 this: div.left,
9783 to: cast_type,
9784 trailing_comments: vec![],
9785 double_colon_syntax: false,
9786 format: None,
9787 default: None,
9788 inferred_type: None,
9789 }));
9790 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9791 condition: if_func.condition,
9792 true_value: if_func.true_value,
9793 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
9794 casted_left,
9795 div.right,
9796 )))),
9797 original_name: if_func.original_name,
9798 inferred_type: None,
9799 })))
9800 } else {
9801 // Not actually a Div, reconstruct
9802 Ok(Expression::IfFunc(Box::new(if_func)))
9803 }
9804 }
9805
9806 Action::ArrayAggCollectList => {
9807 let agg = if let Expression::ArrayAgg(a) = e {
9808 *a
9809 } else {
9810 unreachable!("action only triggered for ArrayAgg expressions")
9811 };
9812 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9813 name: Some("COLLECT_LIST".to_string()),
9814 ..agg
9815 })))
9816 }
9817
9818 Action::ArrayAggToGroupConcat => {
9819 let agg = if let Expression::ArrayAgg(a) = e {
9820 *a
9821 } else {
9822 unreachable!("action only triggered for ArrayAgg expressions")
9823 };
9824 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9825 name: Some("GROUP_CONCAT".to_string()),
9826 ..agg
9827 })))
9828 }
9829
9830 Action::ArrayAggWithinGroupFilter => {
9831 let wg = if let Expression::WithinGroup(w) = e {
9832 *w
9833 } else {
9834 unreachable!("action only triggered for WithinGroup expressions")
9835 };
9836 if let Expression::ArrayAgg(inner_agg) = wg.this {
9837 let col = inner_agg.this.clone();
9838 let filter = Expression::IsNull(Box::new(IsNull {
9839 this: col,
9840 not: true,
9841 postfix_form: false,
9842 }));
9843 // For DuckDB, add explicit NULLS FIRST for DESC ordering
9844 let order_by = if matches!(target, DialectType::DuckDB) {
9845 wg.order_by
9846 .into_iter()
9847 .map(|mut o| {
9848 if o.desc && o.nulls_first.is_none() {
9849 o.nulls_first = Some(true);
9850 }
9851 o
9852 })
9853 .collect()
9854 } else {
9855 wg.order_by
9856 };
9857 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9858 this: inner_agg.this,
9859 distinct: inner_agg.distinct,
9860 filter: Some(filter),
9861 order_by,
9862 name: inner_agg.name,
9863 ignore_nulls: inner_agg.ignore_nulls,
9864 having_max: inner_agg.having_max,
9865 limit: inner_agg.limit,
9866 inferred_type: None,
9867 })))
9868 } else {
9869 Ok(Expression::WithinGroup(Box::new(wg)))
9870 }
9871 }
9872
9873 Action::ArrayAggFilter => {
9874 let agg = if let Expression::ArrayAgg(a) = e {
9875 *a
9876 } else {
9877 unreachable!("action only triggered for ArrayAgg expressions")
9878 };
9879 let col = agg.this.clone();
9880 let filter = Expression::IsNull(Box::new(IsNull {
9881 this: col,
9882 not: true,
9883 postfix_form: false,
9884 }));
9885 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9886 filter: Some(filter),
9887 ..agg
9888 })))
9889 }
9890
9891 Action::ArrayAggNullFilter => {
9892 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
9893 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
9894 let agg = if let Expression::ArrayAgg(a) = e {
9895 *a
9896 } else {
9897 unreachable!("action only triggered for ArrayAgg expressions")
9898 };
9899 let col = agg.this.clone();
9900 let not_null = Expression::IsNull(Box::new(IsNull {
9901 this: col,
9902 not: true,
9903 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
9904 }));
9905 let new_filter = if let Some(existing_filter) = agg.filter {
9906 // AND the NOT IS NULL with existing filter
9907 Expression::And(Box::new(crate::expressions::BinaryOp::new(
9908 existing_filter,
9909 not_null,
9910 )))
9911 } else {
9912 not_null
9913 };
9914 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9915 filter: Some(new_filter),
9916 ..agg
9917 })))
9918 }
9919
9920 Action::BigQueryArraySelectAsStructToSnowflake => {
9921 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
9922 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
9923 if let Expression::Function(mut f) = e {
9924 let is_match = f.args.len() == 1
9925 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
9926 if is_match {
9927 let inner_select = match f.args.remove(0) {
9928 Expression::Select(s) => *s,
9929 _ => unreachable!(
9930 "argument already verified to be a Select expression"
9931 ),
9932 };
9933 // Build OBJECT_CONSTRUCT args from SELECT expressions
9934 let mut oc_args = Vec::new();
9935 for expr in &inner_select.expressions {
9936 match expr {
9937 Expression::Alias(a) => {
9938 let key = Expression::Literal(Box::new(Literal::String(
9939 a.alias.name.clone(),
9940 )));
9941 let value = a.this.clone();
9942 oc_args.push(key);
9943 oc_args.push(value);
9944 }
9945 Expression::Column(c) => {
9946 let key = Expression::Literal(Box::new(Literal::String(
9947 c.name.name.clone(),
9948 )));
9949 oc_args.push(key);
9950 oc_args.push(expr.clone());
9951 }
9952 _ => {
9953 oc_args.push(expr.clone());
9954 }
9955 }
9956 }
9957 let object_construct = Expression::Function(Box::new(Function::new(
9958 "OBJECT_CONSTRUCT".to_string(),
9959 oc_args,
9960 )));
9961 let array_agg = Expression::Function(Box::new(Function::new(
9962 "ARRAY_AGG".to_string(),
9963 vec![object_construct],
9964 )));
9965 let mut new_select = crate::expressions::Select::new();
9966 new_select.expressions = vec![array_agg];
9967 new_select.from = inner_select.from.clone();
9968 new_select.where_clause = inner_select.where_clause.clone();
9969 new_select.group_by = inner_select.group_by.clone();
9970 new_select.having = inner_select.having.clone();
9971 new_select.joins = inner_select.joins.clone();
9972 Ok(Expression::Subquery(Box::new(
9973 crate::expressions::Subquery {
9974 this: Expression::Select(Box::new(new_select)),
9975 alias: None,
9976 column_aliases: Vec::new(),
9977 alias_explicit_as: false,
9978 alias_keyword: None,
9979 order_by: None,
9980 limit: None,
9981 offset: None,
9982 distribute_by: None,
9983 sort_by: None,
9984 cluster_by: None,
9985 lateral: false,
9986 modifiers_inside: false,
9987 trailing_comments: Vec::new(),
9988 inferred_type: None,
9989 },
9990 )))
9991 } else {
9992 Ok(Expression::Function(f))
9993 }
9994 } else {
9995 Ok(e)
9996 }
9997 }
9998
9999 Action::BigQueryPercentileContToDuckDB => {
10000 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
10001 if let Expression::AggregateFunction(mut af) = e {
10002 af.name = "QUANTILE_CONT".to_string();
10003 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
10004 // Keep only first 2 args
10005 if af.args.len() > 2 {
10006 af.args.truncate(2);
10007 }
10008 Ok(Expression::AggregateFunction(af))
10009 } else {
10010 Ok(e)
10011 }
10012 }
10013
10014 Action::ArrayAggIgnoreNullsDuckDB => {
10015 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
10016 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
10017 let mut agg = if let Expression::ArrayAgg(a) = e {
10018 *a
10019 } else {
10020 unreachable!("action only triggered for ArrayAgg expressions")
10021 };
10022 agg.ignore_nulls = None; // Strip IGNORE NULLS
10023 if !agg.order_by.is_empty() {
10024 agg.order_by[0].nulls_first = Some(true);
10025 }
10026 Ok(Expression::ArrayAgg(Box::new(agg)))
10027 }
10028
10029 Action::CountDistinctMultiArg => {
10030 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
10031 if let Expression::Count(c) = e {
10032 if let Some(Expression::Tuple(t)) = c.this {
10033 let args = t.expressions;
10034 // Build CASE expression:
10035 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
10036 let mut whens = Vec::new();
10037 for arg in &args {
10038 whens.push((
10039 Expression::IsNull(Box::new(IsNull {
10040 this: arg.clone(),
10041 not: false,
10042 postfix_form: false,
10043 })),
10044 Expression::Null(crate::expressions::Null),
10045 ));
10046 }
10047 // Build the tuple for ELSE
10048 let tuple_expr =
10049 Expression::Tuple(Box::new(crate::expressions::Tuple {
10050 expressions: args,
10051 }));
10052 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
10053 operand: None,
10054 whens,
10055 else_: Some(tuple_expr),
10056 comments: Vec::new(),
10057 inferred_type: None,
10058 }));
10059 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
10060 this: Some(case_expr),
10061 star: false,
10062 distinct: true,
10063 filter: c.filter,
10064 ignore_nulls: c.ignore_nulls,
10065 original_name: c.original_name,
10066 inferred_type: None,
10067 })))
10068 } else {
10069 Ok(Expression::Count(c))
10070 }
10071 } else {
10072 Ok(e)
10073 }
10074 }
10075
10076 Action::CastTimestampToDatetime => {
10077 let c = if let Expression::Cast(c) = e {
10078 *c
10079 } else {
10080 unreachable!("action only triggered for Cast expressions")
10081 };
10082 Ok(Expression::Cast(Box::new(Cast {
10083 to: DataType::Custom {
10084 name: "DATETIME".to_string(),
10085 },
10086 ..c
10087 })))
10088 }
10089
10090 Action::CastTimestampStripTz => {
10091 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
10092 let c = if let Expression::Cast(c) = e {
10093 *c
10094 } else {
10095 unreachable!("action only triggered for Cast expressions")
10096 };
10097 Ok(Expression::Cast(Box::new(Cast {
10098 to: DataType::Timestamp {
10099 precision: None,
10100 timezone: false,
10101 },
10102 ..c
10103 })))
10104 }
10105
10106 Action::CastTimestamptzToFunc => {
10107 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
10108 let c = if let Expression::Cast(c) = e {
10109 *c
10110 } else {
10111 unreachable!("action only triggered for Cast expressions")
10112 };
10113 Ok(Expression::Function(Box::new(Function::new(
10114 "TIMESTAMP".to_string(),
10115 vec![c.this],
10116 ))))
10117 }
10118
10119 Action::ToDateToCast => {
10120 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
10121 if let Expression::Function(f) = e {
10122 let arg = f.args.into_iter().next().unwrap();
10123 Ok(Expression::Cast(Box::new(Cast {
10124 this: arg,
10125 to: DataType::Date,
10126 double_colon_syntax: false,
10127 trailing_comments: vec![],
10128 format: None,
10129 default: None,
10130 inferred_type: None,
10131 })))
10132 } else {
10133 Ok(e)
10134 }
10135 }
10136 Action::DateTruncWrapCast => {
10137 // Handle both Expression::DateTrunc/TimestampTrunc and
10138 // Expression::Function("DATE_TRUNC", [unit, expr])
10139 match e {
10140 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
10141 let input_type = match &d.this {
10142 Expression::Cast(c) => Some(c.to.clone()),
10143 _ => None,
10144 };
10145 if let Some(cast_type) = input_type {
10146 let is_time = matches!(cast_type, DataType::Time { .. });
10147 if is_time {
10148 let date_expr = Expression::Cast(Box::new(Cast {
10149 this: Expression::Literal(Box::new(
10150 crate::expressions::Literal::String(
10151 "1970-01-01".to_string(),
10152 ),
10153 )),
10154 to: DataType::Date,
10155 double_colon_syntax: false,
10156 trailing_comments: vec![],
10157 format: None,
10158 default: None,
10159 inferred_type: None,
10160 }));
10161 let add_expr =
10162 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
10163 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
10164 this: add_expr,
10165 unit: d.unit,
10166 }));
10167 Ok(Expression::Cast(Box::new(Cast {
10168 this: inner,
10169 to: cast_type,
10170 double_colon_syntax: false,
10171 trailing_comments: vec![],
10172 format: None,
10173 default: None,
10174 inferred_type: None,
10175 })))
10176 } else {
10177 let inner = Expression::DateTrunc(Box::new(*d));
10178 Ok(Expression::Cast(Box::new(Cast {
10179 this: inner,
10180 to: cast_type,
10181 double_colon_syntax: false,
10182 trailing_comments: vec![],
10183 format: None,
10184 default: None,
10185 inferred_type: None,
10186 })))
10187 }
10188 } else {
10189 Ok(Expression::DateTrunc(d))
10190 }
10191 }
10192 Expression::Function(f) if f.args.len() == 2 => {
10193 // Function-based DATE_TRUNC(unit, expr)
10194 let input_type = match &f.args[1] {
10195 Expression::Cast(c) => Some(c.to.clone()),
10196 _ => None,
10197 };
10198 if let Some(cast_type) = input_type {
10199 let is_time = matches!(cast_type, DataType::Time { .. });
10200 if is_time {
10201 let date_expr = Expression::Cast(Box::new(Cast {
10202 this: Expression::Literal(Box::new(
10203 crate::expressions::Literal::String(
10204 "1970-01-01".to_string(),
10205 ),
10206 )),
10207 to: DataType::Date,
10208 double_colon_syntax: false,
10209 trailing_comments: vec![],
10210 format: None,
10211 default: None,
10212 inferred_type: None,
10213 }));
10214 let mut args = f.args;
10215 let unit_arg = args.remove(0);
10216 let time_expr = args.remove(0);
10217 let add_expr = Expression::Add(Box::new(BinaryOp::new(
10218 date_expr, time_expr,
10219 )));
10220 let inner = Expression::Function(Box::new(Function::new(
10221 "DATE_TRUNC".to_string(),
10222 vec![unit_arg, add_expr],
10223 )));
10224 Ok(Expression::Cast(Box::new(Cast {
10225 this: inner,
10226 to: cast_type,
10227 double_colon_syntax: false,
10228 trailing_comments: vec![],
10229 format: None,
10230 default: None,
10231 inferred_type: None,
10232 })))
10233 } else {
10234 // Wrap the function in CAST
10235 Ok(Expression::Cast(Box::new(Cast {
10236 this: Expression::Function(f),
10237 to: cast_type,
10238 double_colon_syntax: false,
10239 trailing_comments: vec![],
10240 format: None,
10241 default: None,
10242 inferred_type: None,
10243 })))
10244 }
10245 } else {
10246 Ok(Expression::Function(f))
10247 }
10248 }
10249 other => Ok(other),
10250 }
10251 }
10252
10253 Action::RegexpReplaceSnowflakeToDuckDB => {
10254 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
10255 if let Expression::Function(f) = e {
10256 let mut args = f.args;
10257 let subject = args.remove(0);
10258 let pattern = args.remove(0);
10259 let replacement = args.remove(0);
10260 Ok(Expression::Function(Box::new(Function::new(
10261 "REGEXP_REPLACE".to_string(),
10262 vec![
10263 subject,
10264 pattern,
10265 replacement,
10266 Expression::Literal(Box::new(crate::expressions::Literal::String(
10267 "g".to_string(),
10268 ))),
10269 ],
10270 ))))
10271 } else {
10272 Ok(e)
10273 }
10274 }
10275
10276 Action::RegexpReplacePositionSnowflakeToDuckDB => {
10277 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
10278 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
10279 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
10280 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
10281 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
10282 if let Expression::Function(f) = e {
10283 let mut args = f.args;
10284 let subject = args.remove(0);
10285 let pattern = args.remove(0);
10286 let replacement = args.remove(0);
10287 let position = args.remove(0);
10288 let occurrence = args.remove(0);
10289
10290 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10291 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10292 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10293
10294 if is_pos_1 && is_occ_1 {
10295 // REGEXP_REPLACE(s, p, r) - single replace, no flags
10296 Ok(Expression::Function(Box::new(Function::new(
10297 "REGEXP_REPLACE".to_string(),
10298 vec![subject, pattern, replacement],
10299 ))))
10300 } else if is_pos_1 && is_occ_0 {
10301 // REGEXP_REPLACE(s, p, r, 'g') - global replace
10302 Ok(Expression::Function(Box::new(Function::new(
10303 "REGEXP_REPLACE".to_string(),
10304 vec![
10305 subject,
10306 pattern,
10307 replacement,
10308 Expression::Literal(Box::new(Literal::String("g".to_string()))),
10309 ],
10310 ))))
10311 } else {
10312 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
10313 // Pre-compute pos-1 when position is a numeric literal
10314 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
10315 if let Literal::Number(ref n) = lit.as_ref() {
10316 if let Ok(val) = n.parse::<i64>() {
10317 Expression::number(val - 1)
10318 } else {
10319 Expression::Sub(Box::new(BinaryOp::new(
10320 position.clone(),
10321 Expression::number(1),
10322 )))
10323 }
10324 } else {
10325 position.clone()
10326 }
10327 } else {
10328 Expression::Sub(Box::new(BinaryOp::new(
10329 position.clone(),
10330 Expression::number(1),
10331 )))
10332 };
10333 let prefix = Expression::Function(Box::new(Function::new(
10334 "SUBSTRING".to_string(),
10335 vec![subject.clone(), Expression::number(1), pos_minus_1],
10336 )));
10337 let suffix_subject = Expression::Function(Box::new(Function::new(
10338 "SUBSTRING".to_string(),
10339 vec![subject, position],
10340 )));
10341 let mut replace_args = vec![suffix_subject, pattern, replacement];
10342 if is_occ_0 {
10343 replace_args.push(Expression::Literal(Box::new(Literal::String(
10344 "g".to_string(),
10345 ))));
10346 }
10347 let replace_expr = Expression::Function(Box::new(Function::new(
10348 "REGEXP_REPLACE".to_string(),
10349 replace_args,
10350 )));
10351 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10352 this: Box::new(prefix),
10353 expression: Box::new(replace_expr),
10354 safe: None,
10355 })))
10356 }
10357 } else {
10358 Ok(e)
10359 }
10360 }
10361
10362 Action::RegexpSubstrSnowflakeToDuckDB => {
10363 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
10364 if let Expression::Function(f) = e {
10365 let mut args = f.args;
10366 let arg_count = args.len();
10367 match arg_count {
10368 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
10369 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10370 "REGEXP_EXTRACT".to_string(),
10371 args,
10372 )))),
10373 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
10374 3 => {
10375 let subject = args.remove(0);
10376 let pattern = args.remove(0);
10377 let position = args.remove(0);
10378 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10379 if is_pos_1 {
10380 Ok(Expression::Function(Box::new(Function::new(
10381 "REGEXP_EXTRACT".to_string(),
10382 vec![subject, pattern],
10383 ))))
10384 } else {
10385 let substring_expr =
10386 Expression::Function(Box::new(Function::new(
10387 "SUBSTRING".to_string(),
10388 vec![subject, position],
10389 )));
10390 let nullif_expr =
10391 Expression::Function(Box::new(Function::new(
10392 "NULLIF".to_string(),
10393 vec![
10394 substring_expr,
10395 Expression::Literal(Box::new(Literal::String(
10396 String::new(),
10397 ))),
10398 ],
10399 )));
10400 Ok(Expression::Function(Box::new(Function::new(
10401 "REGEXP_EXTRACT".to_string(),
10402 vec![nullif_expr, pattern],
10403 ))))
10404 }
10405 }
10406 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
10407 4 => {
10408 let subject = args.remove(0);
10409 let pattern = args.remove(0);
10410 let position = args.remove(0);
10411 let occurrence = args.remove(0);
10412 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10413 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10414
10415 let effective_subject = if is_pos_1 {
10416 subject
10417 } else {
10418 let substring_expr =
10419 Expression::Function(Box::new(Function::new(
10420 "SUBSTRING".to_string(),
10421 vec![subject, position],
10422 )));
10423 Expression::Function(Box::new(Function::new(
10424 "NULLIF".to_string(),
10425 vec![
10426 substring_expr,
10427 Expression::Literal(Box::new(Literal::String(
10428 String::new(),
10429 ))),
10430 ],
10431 )))
10432 };
10433
10434 if is_occ_1 {
10435 Ok(Expression::Function(Box::new(Function::new(
10436 "REGEXP_EXTRACT".to_string(),
10437 vec![effective_subject, pattern],
10438 ))))
10439 } else {
10440 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
10441 let extract_all =
10442 Expression::Function(Box::new(Function::new(
10443 "REGEXP_EXTRACT_ALL".to_string(),
10444 vec![effective_subject, pattern],
10445 )));
10446 Ok(Expression::Function(Box::new(Function::new(
10447 "ARRAY_EXTRACT".to_string(),
10448 vec![extract_all, occurrence],
10449 ))))
10450 }
10451 }
10452 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
10453 5 => {
10454 let subject = args.remove(0);
10455 let pattern = args.remove(0);
10456 let _position = args.remove(0);
10457 let _occurrence = args.remove(0);
10458 let _flags = args.remove(0);
10459 // Strip 'e' flag, convert to REGEXP_EXTRACT
10460 Ok(Expression::Function(Box::new(Function::new(
10461 "REGEXP_EXTRACT".to_string(),
10462 vec![subject, pattern],
10463 ))))
10464 }
10465 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
10466 _ => {
10467 let subject = args.remove(0);
10468 let pattern = args.remove(0);
10469 let _position = args.remove(0);
10470 let _occurrence = args.remove(0);
10471 let _flags = args.remove(0);
10472 let group = args.remove(0);
10473 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10474 if is_group_0 {
10475 // Strip group=0 (default)
10476 Ok(Expression::Function(Box::new(Function::new(
10477 "REGEXP_EXTRACT".to_string(),
10478 vec![subject, pattern],
10479 ))))
10480 } else {
10481 Ok(Expression::Function(Box::new(Function::new(
10482 "REGEXP_EXTRACT".to_string(),
10483 vec![subject, pattern, group],
10484 ))))
10485 }
10486 }
10487 }
10488 } else {
10489 Ok(e)
10490 }
10491 }
10492
10493 Action::RegexpSubstrSnowflakeIdentity => {
10494 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
10495 // Strip trailing group=0
10496 if let Expression::Function(f) = e {
10497 let func_name = f.name.clone();
10498 let mut args = f.args;
10499 if args.len() == 6 {
10500 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10501 if is_group_0 {
10502 args.truncate(5);
10503 }
10504 }
10505 Ok(Expression::Function(Box::new(Function::new(
10506 func_name, args,
10507 ))))
10508 } else {
10509 Ok(e)
10510 }
10511 }
10512
10513 Action::RegexpSubstrAllSnowflakeToDuckDB => {
10514 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
10515 if let Expression::Function(f) = e {
10516 let mut args = f.args;
10517 let arg_count = args.len();
10518 match arg_count {
10519 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
10520 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10521 "REGEXP_EXTRACT_ALL".to_string(),
10522 args,
10523 )))),
10524 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
10525 3 => {
10526 let subject = args.remove(0);
10527 let pattern = args.remove(0);
10528 let position = args.remove(0);
10529 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10530 if is_pos_1 {
10531 Ok(Expression::Function(Box::new(Function::new(
10532 "REGEXP_EXTRACT_ALL".to_string(),
10533 vec![subject, pattern],
10534 ))))
10535 } else {
10536 let substring_expr =
10537 Expression::Function(Box::new(Function::new(
10538 "SUBSTRING".to_string(),
10539 vec![subject, position],
10540 )));
10541 Ok(Expression::Function(Box::new(Function::new(
10542 "REGEXP_EXTRACT_ALL".to_string(),
10543 vec![substring_expr, pattern],
10544 ))))
10545 }
10546 }
10547 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
10548 4 => {
10549 let subject = args.remove(0);
10550 let pattern = args.remove(0);
10551 let position = args.remove(0);
10552 let occurrence = args.remove(0);
10553 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10554 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10555
10556 let effective_subject = if is_pos_1 {
10557 subject
10558 } else {
10559 Expression::Function(Box::new(Function::new(
10560 "SUBSTRING".to_string(),
10561 vec![subject, position],
10562 )))
10563 };
10564
10565 if is_occ_1 {
10566 Ok(Expression::Function(Box::new(Function::new(
10567 "REGEXP_EXTRACT_ALL".to_string(),
10568 vec![effective_subject, pattern],
10569 ))))
10570 } else {
10571 // REGEXP_EXTRACT_ALL(s, p)[occ:]
10572 let extract_all =
10573 Expression::Function(Box::new(Function::new(
10574 "REGEXP_EXTRACT_ALL".to_string(),
10575 vec![effective_subject, pattern],
10576 )));
10577 Ok(Expression::ArraySlice(Box::new(
10578 crate::expressions::ArraySlice {
10579 this: extract_all,
10580 start: Some(occurrence),
10581 end: None,
10582 },
10583 )))
10584 }
10585 }
10586 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
10587 5 => {
10588 let subject = args.remove(0);
10589 let pattern = args.remove(0);
10590 let _position = args.remove(0);
10591 let _occurrence = args.remove(0);
10592 let _flags = args.remove(0);
10593 Ok(Expression::Function(Box::new(Function::new(
10594 "REGEXP_EXTRACT_ALL".to_string(),
10595 vec![subject, pattern],
10596 ))))
10597 }
10598 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
10599 _ => {
10600 let subject = args.remove(0);
10601 let pattern = args.remove(0);
10602 let _position = args.remove(0);
10603 let _occurrence = args.remove(0);
10604 let _flags = args.remove(0);
10605 let group = args.remove(0);
10606 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10607 if is_group_0 {
10608 Ok(Expression::Function(Box::new(Function::new(
10609 "REGEXP_EXTRACT_ALL".to_string(),
10610 vec![subject, pattern],
10611 ))))
10612 } else {
10613 Ok(Expression::Function(Box::new(Function::new(
10614 "REGEXP_EXTRACT_ALL".to_string(),
10615 vec![subject, pattern, group],
10616 ))))
10617 }
10618 }
10619 }
10620 } else {
10621 Ok(e)
10622 }
10623 }
10624
10625 Action::RegexpCountSnowflakeToDuckDB => {
10626 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
10627 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10628 if let Expression::Function(f) = e {
10629 let mut args = f.args;
10630 let arg_count = args.len();
10631 let subject = args.remove(0);
10632 let pattern = args.remove(0);
10633
10634 // Handle position arg
10635 let effective_subject = if arg_count >= 3 {
10636 let position = args.remove(0);
10637 Expression::Function(Box::new(Function::new(
10638 "SUBSTRING".to_string(),
10639 vec![subject, position],
10640 )))
10641 } else {
10642 subject
10643 };
10644
10645 // Handle flags arg -> embed as (?flags) prefix in pattern
10646 let effective_pattern = if arg_count >= 4 {
10647 let flags = args.remove(0);
10648 match &flags {
10649 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
10650 {
10651 let Literal::String(f_str) = lit.as_ref() else {
10652 unreachable!()
10653 };
10654 // Always use concatenation: '(?flags)' || pattern
10655 let prefix = Expression::Literal(Box::new(Literal::String(
10656 format!("(?{})", f_str),
10657 )));
10658 Expression::DPipe(Box::new(crate::expressions::DPipe {
10659 this: Box::new(prefix),
10660 expression: Box::new(pattern.clone()),
10661 safe: None,
10662 }))
10663 }
10664 _ => pattern.clone(),
10665 }
10666 } else {
10667 pattern.clone()
10668 };
10669
10670 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10671 let extract_all = Expression::Function(Box::new(Function::new(
10672 "REGEXP_EXTRACT_ALL".to_string(),
10673 vec![effective_subject, effective_pattern.clone()],
10674 )));
10675 let length_expr =
10676 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10677 this: extract_all,
10678 original_name: None,
10679 inferred_type: None,
10680 }));
10681 let condition = Expression::Eq(Box::new(BinaryOp::new(
10682 effective_pattern,
10683 Expression::Literal(Box::new(Literal::String(String::new()))),
10684 )));
10685 Ok(Expression::Case(Box::new(Case {
10686 operand: None,
10687 whens: vec![(condition, Expression::number(0))],
10688 else_: Some(length_expr),
10689 comments: vec![],
10690 inferred_type: None,
10691 })))
10692 } else {
10693 Ok(e)
10694 }
10695 }
10696
10697 Action::RegexpInstrSnowflakeToDuckDB => {
10698 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
10699 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
10700 // WHEN p = '' THEN 0
10701 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10702 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
10703 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
10704 // + pos_offset
10705 // END
10706 if let Expression::Function(f) = e {
10707 let mut args = f.args;
10708 let subject = args.remove(0);
10709 let pattern = if !args.is_empty() {
10710 args.remove(0)
10711 } else {
10712 Expression::Literal(Box::new(Literal::String(String::new())))
10713 };
10714
10715 // Collect all original args for NULL checks
10716 let position = if !args.is_empty() {
10717 Some(args.remove(0))
10718 } else {
10719 None
10720 };
10721 let occurrence = if !args.is_empty() {
10722 Some(args.remove(0))
10723 } else {
10724 None
10725 };
10726 let option = if !args.is_empty() {
10727 Some(args.remove(0))
10728 } else {
10729 None
10730 };
10731 let flags = if !args.is_empty() {
10732 Some(args.remove(0))
10733 } else {
10734 None
10735 };
10736 let _group = if !args.is_empty() {
10737 Some(args.remove(0))
10738 } else {
10739 None
10740 };
10741
10742 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
10743 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
10744
10745 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
10746 let mut null_checks: Vec<Expression> = vec![
10747 Expression::Is(Box::new(BinaryOp::new(
10748 subject.clone(),
10749 Expression::Null(Null),
10750 ))),
10751 Expression::Is(Box::new(BinaryOp::new(
10752 pattern.clone(),
10753 Expression::Null(Null),
10754 ))),
10755 ];
10756 // Add NULL checks for all provided optional args
10757 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
10758 if let Some(arg) = opt_arg {
10759 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
10760 (*arg).clone(),
10761 Expression::Null(Null),
10762 ))));
10763 }
10764 }
10765 // Chain with OR
10766 let null_condition = null_checks
10767 .into_iter()
10768 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
10769 .unwrap();
10770
10771 // Effective subject (apply position offset)
10772 let effective_subject = if is_pos_1 {
10773 subject.clone()
10774 } else {
10775 let pos = position.clone().unwrap_or(Expression::number(1));
10776 Expression::Function(Box::new(Function::new(
10777 "SUBSTRING".to_string(),
10778 vec![subject.clone(), pos],
10779 )))
10780 };
10781
10782 // Effective pattern (apply flags if present)
10783 let effective_pattern = if let Some(ref fl) = flags {
10784 if let Expression::Literal(lit) = fl {
10785 if let Literal::String(f_str) = lit.as_ref() {
10786 if !f_str.is_empty() {
10787 let prefix = Expression::Literal(Box::new(
10788 Literal::String(format!("(?{})", f_str)),
10789 ));
10790 Expression::DPipe(Box::new(crate::expressions::DPipe {
10791 this: Box::new(prefix),
10792 expression: Box::new(pattern.clone()),
10793 safe: None,
10794 }))
10795 } else {
10796 pattern.clone()
10797 }
10798 } else {
10799 fl.clone()
10800 }
10801 } else {
10802 pattern.clone()
10803 }
10804 } else {
10805 pattern.clone()
10806 };
10807
10808 // WHEN pattern = '' THEN 0
10809 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
10810 effective_pattern.clone(),
10811 Expression::Literal(Box::new(Literal::String(String::new()))),
10812 )));
10813
10814 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10815 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
10816 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10817 this: Expression::Function(Box::new(Function::new(
10818 "REGEXP_EXTRACT_ALL".to_string(),
10819 vec![effective_subject.clone(), effective_pattern.clone()],
10820 ))),
10821 original_name: None,
10822 inferred_type: None,
10823 })),
10824 occurrence_expr.clone(),
10825 )));
10826
10827 // Helper: build LENGTH lambda for LIST_TRANSFORM
10828 let make_len_lambda = || {
10829 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
10830 parameters: vec![crate::expressions::Identifier::new("x")],
10831 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
10832 this: Expression::Identifier(
10833 crate::expressions::Identifier::new("x"),
10834 ),
10835 original_name: None,
10836 inferred_type: None,
10837 })),
10838 colon: false,
10839 parameter_types: vec![],
10840 }))
10841 };
10842
10843 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
10844 let split_sliced =
10845 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10846 this: Expression::Function(Box::new(Function::new(
10847 "STRING_SPLIT_REGEX".to_string(),
10848 vec![effective_subject.clone(), effective_pattern.clone()],
10849 ))),
10850 start: Some(Expression::number(1)),
10851 end: Some(occurrence_expr.clone()),
10852 }));
10853 let split_sum = Expression::Function(Box::new(Function::new(
10854 "COALESCE".to_string(),
10855 vec![
10856 Expression::Function(Box::new(Function::new(
10857 "LIST_SUM".to_string(),
10858 vec![Expression::Function(Box::new(Function::new(
10859 "LIST_TRANSFORM".to_string(),
10860 vec![split_sliced, make_len_lambda()],
10861 )))],
10862 ))),
10863 Expression::number(0),
10864 ],
10865 )));
10866
10867 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
10868 let extract_sliced =
10869 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10870 this: Expression::Function(Box::new(Function::new(
10871 "REGEXP_EXTRACT_ALL".to_string(),
10872 vec![effective_subject.clone(), effective_pattern.clone()],
10873 ))),
10874 start: Some(Expression::number(1)),
10875 end: Some(Expression::Sub(Box::new(BinaryOp::new(
10876 occurrence_expr.clone(),
10877 Expression::number(1),
10878 )))),
10879 }));
10880 let extract_sum = Expression::Function(Box::new(Function::new(
10881 "COALESCE".to_string(),
10882 vec![
10883 Expression::Function(Box::new(Function::new(
10884 "LIST_SUM".to_string(),
10885 vec![Expression::Function(Box::new(Function::new(
10886 "LIST_TRANSFORM".to_string(),
10887 vec![extract_sliced, make_len_lambda()],
10888 )))],
10889 ))),
10890 Expression::number(0),
10891 ],
10892 )));
10893
10894 // Position offset: pos - 1 when pos > 1, else 0
10895 let pos_offset: Expression = if !is_pos_1 {
10896 let pos = position.clone().unwrap_or(Expression::number(1));
10897 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
10898 } else {
10899 Expression::number(0)
10900 };
10901
10902 // ELSE: 1 + split_sum + extract_sum + pos_offset
10903 let else_expr = Expression::Add(Box::new(BinaryOp::new(
10904 Expression::Add(Box::new(BinaryOp::new(
10905 Expression::Add(Box::new(BinaryOp::new(
10906 Expression::number(1),
10907 split_sum,
10908 ))),
10909 extract_sum,
10910 ))),
10911 pos_offset,
10912 )));
10913
10914 Ok(Expression::Case(Box::new(Case {
10915 operand: None,
10916 whens: vec![
10917 (null_condition, Expression::Null(Null)),
10918 (empty_pattern_check, Expression::number(0)),
10919 (match_count_check, Expression::number(0)),
10920 ],
10921 else_: Some(else_expr),
10922 comments: vec![],
10923 inferred_type: None,
10924 })))
10925 } else {
10926 Ok(e)
10927 }
10928 }
10929
10930 Action::RlikeSnowflakeToDuckDB => {
10931 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
10932 // Both do full-string matching, so no anchoring needed
10933 let (subject, pattern, flags) = match e {
10934 Expression::RegexpLike(ref rl) => {
10935 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
10936 }
10937 Expression::Function(ref f) if f.args.len() >= 2 => {
10938 let s = f.args[0].clone();
10939 let p = f.args[1].clone();
10940 let fl = f.args.get(2).cloned();
10941 (s, p, fl)
10942 }
10943 _ => return Ok(e),
10944 };
10945
10946 let mut result_args = vec![subject, pattern];
10947 if let Some(fl) = flags {
10948 result_args.push(fl);
10949 }
10950 Ok(Expression::Function(Box::new(Function::new(
10951 "REGEXP_FULL_MATCH".to_string(),
10952 result_args,
10953 ))))
10954 }
10955
10956 Action::RegexpExtractAllToSnowflake => {
10957 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
10958 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
10959 if let Expression::Function(f) = e {
10960 let mut args = f.args;
10961 if args.len() >= 2 {
10962 let str_expr = args.remove(0);
10963 let pattern = args.remove(0);
10964
10965 let has_groups = match &pattern {
10966 Expression::Literal(lit)
10967 if matches!(lit.as_ref(), Literal::String(_)) =>
10968 {
10969 let Literal::String(s) = lit.as_ref() else {
10970 unreachable!()
10971 };
10972 s.contains('(') && s.contains(')')
10973 }
10974 _ => false,
10975 };
10976
10977 if has_groups {
10978 Ok(Expression::Function(Box::new(Function::new(
10979 "REGEXP_SUBSTR_ALL".to_string(),
10980 vec![
10981 str_expr,
10982 pattern,
10983 Expression::number(1),
10984 Expression::number(1),
10985 Expression::Literal(Box::new(Literal::String(
10986 "c".to_string(),
10987 ))),
10988 Expression::number(1),
10989 ],
10990 ))))
10991 } else {
10992 Ok(Expression::Function(Box::new(Function::new(
10993 "REGEXP_SUBSTR_ALL".to_string(),
10994 vec![str_expr, pattern],
10995 ))))
10996 }
10997 } else {
10998 Ok(Expression::Function(Box::new(Function::new(
10999 "REGEXP_SUBSTR_ALL".to_string(),
11000 args,
11001 ))))
11002 }
11003 } else {
11004 Ok(e)
11005 }
11006 }
11007
11008 Action::SetToVariable => {
11009 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
11010 if let Expression::SetStatement(mut s) = e {
11011 for item in &mut s.items {
11012 if item.kind.is_none() {
11013 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
11014 let already_variable = match &item.name {
11015 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
11016 _ => false,
11017 };
11018 if already_variable {
11019 // Extract the actual name and set kind
11020 if let Expression::Identifier(ref mut id) = item.name {
11021 let actual_name = id.name["VARIABLE ".len()..].to_string();
11022 id.name = actual_name;
11023 }
11024 }
11025 item.kind = Some("VARIABLE".to_string());
11026 }
11027 }
11028 Ok(Expression::SetStatement(s))
11029 } else {
11030 Ok(e)
11031 }
11032 }
11033
11034 Action::ConvertTimezoneToExpr => {
11035 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
11036 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
11037 if let Expression::Function(f) = e {
11038 if f.args.len() == 2 {
11039 let mut args = f.args;
11040 let target_tz = args.remove(0);
11041 let timestamp = args.remove(0);
11042 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
11043 source_tz: None,
11044 target_tz: Some(Box::new(target_tz)),
11045 timestamp: Some(Box::new(timestamp)),
11046 options: vec![],
11047 })))
11048 } else if f.args.len() == 3 {
11049 let mut args = f.args;
11050 let source_tz = args.remove(0);
11051 let target_tz = args.remove(0);
11052 let timestamp = args.remove(0);
11053 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
11054 source_tz: Some(Box::new(source_tz)),
11055 target_tz: Some(Box::new(target_tz)),
11056 timestamp: Some(Box::new(timestamp)),
11057 options: vec![],
11058 })))
11059 } else {
11060 Ok(Expression::Function(f))
11061 }
11062 } else {
11063 Ok(e)
11064 }
11065 }
11066
11067 Action::BigQueryCastType => {
11068 // Convert BigQuery types to standard SQL types
11069 if let Expression::DataType(dt) = e {
11070 match dt {
11071 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
11072 Ok(Expression::DataType(DataType::BigInt { length: None }))
11073 }
11074 DataType::Custom { ref name }
11075 if name.eq_ignore_ascii_case("FLOAT64") =>
11076 {
11077 Ok(Expression::DataType(DataType::Double {
11078 precision: None,
11079 scale: None,
11080 }))
11081 }
11082 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
11083 Ok(Expression::DataType(DataType::Boolean))
11084 }
11085 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
11086 Ok(Expression::DataType(DataType::VarBinary { length: None }))
11087 }
11088 DataType::Custom { ref name }
11089 if name.eq_ignore_ascii_case("NUMERIC") =>
11090 {
11091 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
11092 // default precision (18, 3) being added to bare DECIMAL
11093 if matches!(target, DialectType::DuckDB) {
11094 Ok(Expression::DataType(DataType::Custom {
11095 name: "DECIMAL".to_string(),
11096 }))
11097 } else {
11098 Ok(Expression::DataType(DataType::Decimal {
11099 precision: None,
11100 scale: None,
11101 }))
11102 }
11103 }
11104 DataType::Custom { ref name }
11105 if name.eq_ignore_ascii_case("STRING") =>
11106 {
11107 Ok(Expression::DataType(DataType::String { length: None }))
11108 }
11109 DataType::Custom { ref name }
11110 if name.eq_ignore_ascii_case("DATETIME") =>
11111 {
11112 Ok(Expression::DataType(DataType::Timestamp {
11113 precision: None,
11114 timezone: false,
11115 }))
11116 }
11117 _ => Ok(Expression::DataType(dt)),
11118 }
11119 } else {
11120 Ok(e)
11121 }
11122 }
11123
11124 Action::BigQuerySafeDivide => {
11125 // Convert SafeDivide expression to IF/CASE form for most targets
11126 if let Expression::SafeDivide(sd) = e {
11127 let x = *sd.this;
11128 let y = *sd.expression;
11129 // Wrap x and y in parens if they're complex expressions
11130 let y_ref = match &y {
11131 Expression::Column(_)
11132 | Expression::Literal(_)
11133 | Expression::Identifier(_) => y.clone(),
11134 _ => Expression::Paren(Box::new(Paren {
11135 this: y.clone(),
11136 trailing_comments: vec![],
11137 })),
11138 };
11139 let x_ref = match &x {
11140 Expression::Column(_)
11141 | Expression::Literal(_)
11142 | Expression::Identifier(_) => x.clone(),
11143 _ => Expression::Paren(Box::new(Paren {
11144 this: x.clone(),
11145 trailing_comments: vec![],
11146 })),
11147 };
11148 let condition = Expression::Neq(Box::new(BinaryOp::new(
11149 y_ref.clone(),
11150 Expression::number(0),
11151 )));
11152 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
11153
11154 if matches!(target, DialectType::Spark | DialectType::Databricks) {
11155 Ok(Expression::Function(Box::new(Function::new(
11156 "TRY_DIVIDE".to_string(),
11157 vec![x, y],
11158 ))))
11159 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
11160 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
11161 let cast_x = Expression::Cast(Box::new(Cast {
11162 this: match &x {
11163 Expression::Column(_)
11164 | Expression::Literal(_)
11165 | Expression::Identifier(_) => x,
11166 _ => Expression::Paren(Box::new(Paren {
11167 this: x,
11168 trailing_comments: vec![],
11169 })),
11170 },
11171 to: DataType::Double {
11172 precision: None,
11173 scale: None,
11174 },
11175 trailing_comments: vec![],
11176 double_colon_syntax: false,
11177 format: None,
11178 default: None,
11179 inferred_type: None,
11180 }));
11181 let cast_div = Expression::Div(Box::new(BinaryOp::new(
11182 cast_x,
11183 match &y {
11184 Expression::Column(_)
11185 | Expression::Literal(_)
11186 | Expression::Identifier(_) => y,
11187 _ => Expression::Paren(Box::new(Paren {
11188 this: y,
11189 trailing_comments: vec![],
11190 })),
11191 },
11192 )));
11193 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11194 condition,
11195 true_value: cast_div,
11196 false_value: Some(Expression::Null(Null)),
11197 original_name: None,
11198 inferred_type: None,
11199 })))
11200 } else if matches!(target, DialectType::PostgreSQL) {
11201 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
11202 let cast_x = Expression::Cast(Box::new(Cast {
11203 this: match &x {
11204 Expression::Column(_)
11205 | Expression::Literal(_)
11206 | Expression::Identifier(_) => x,
11207 _ => Expression::Paren(Box::new(Paren {
11208 this: x,
11209 trailing_comments: vec![],
11210 })),
11211 },
11212 to: DataType::Custom {
11213 name: "DOUBLE PRECISION".to_string(),
11214 },
11215 trailing_comments: vec![],
11216 double_colon_syntax: false,
11217 format: None,
11218 default: None,
11219 inferred_type: None,
11220 }));
11221 let y_paren = match &y {
11222 Expression::Column(_)
11223 | Expression::Literal(_)
11224 | Expression::Identifier(_) => y,
11225 _ => Expression::Paren(Box::new(Paren {
11226 this: y,
11227 trailing_comments: vec![],
11228 })),
11229 };
11230 let cast_div =
11231 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
11232 Ok(Expression::Case(Box::new(Case {
11233 operand: None,
11234 whens: vec![(condition, cast_div)],
11235 else_: Some(Expression::Null(Null)),
11236 comments: Vec::new(),
11237 inferred_type: None,
11238 })))
11239 } else if matches!(target, DialectType::DuckDB) {
11240 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
11241 Ok(Expression::Case(Box::new(Case {
11242 operand: None,
11243 whens: vec![(condition, div_expr)],
11244 else_: Some(Expression::Null(Null)),
11245 comments: Vec::new(),
11246 inferred_type: None,
11247 })))
11248 } else if matches!(target, DialectType::Snowflake) {
11249 // Snowflake: IFF(y <> 0, x / y, NULL)
11250 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11251 condition,
11252 true_value: div_expr,
11253 false_value: Some(Expression::Null(Null)),
11254 original_name: Some("IFF".to_string()),
11255 inferred_type: None,
11256 })))
11257 } else {
11258 // All others: IF(y <> 0, x / y, NULL)
11259 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11260 condition,
11261 true_value: div_expr,
11262 false_value: Some(Expression::Null(Null)),
11263 original_name: None,
11264 inferred_type: None,
11265 })))
11266 }
11267 } else {
11268 Ok(e)
11269 }
11270 }
11271
11272 Action::BigQueryLastDayStripUnit => {
11273 if let Expression::LastDay(mut ld) = e {
11274 ld.unit = None; // Strip the unit (MONTH is default)
11275 match target {
11276 DialectType::PostgreSQL => {
11277 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
11278 let date_trunc = Expression::Function(Box::new(Function::new(
11279 "DATE_TRUNC".to_string(),
11280 vec![
11281 Expression::Literal(Box::new(
11282 crate::expressions::Literal::String(
11283 "MONTH".to_string(),
11284 ),
11285 )),
11286 ld.this.clone(),
11287 ],
11288 )));
11289 let plus_month =
11290 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
11291 date_trunc,
11292 Expression::Interval(Box::new(
11293 crate::expressions::Interval {
11294 this: Some(Expression::Literal(Box::new(
11295 crate::expressions::Literal::String(
11296 "1 MONTH".to_string(),
11297 ),
11298 ))),
11299 unit: None,
11300 },
11301 )),
11302 )));
11303 let minus_day =
11304 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
11305 plus_month,
11306 Expression::Interval(Box::new(
11307 crate::expressions::Interval {
11308 this: Some(Expression::Literal(Box::new(
11309 crate::expressions::Literal::String(
11310 "1 DAY".to_string(),
11311 ),
11312 ))),
11313 unit: None,
11314 },
11315 )),
11316 )));
11317 Ok(Expression::Cast(Box::new(Cast {
11318 this: minus_day,
11319 to: DataType::Date,
11320 trailing_comments: vec![],
11321 double_colon_syntax: false,
11322 format: None,
11323 default: None,
11324 inferred_type: None,
11325 })))
11326 }
11327 DialectType::Presto => {
11328 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
11329 Ok(Expression::Function(Box::new(Function::new(
11330 "LAST_DAY_OF_MONTH".to_string(),
11331 vec![ld.this],
11332 ))))
11333 }
11334 DialectType::ClickHouse => {
11335 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
11336 // Need to wrap the DATE type in Nullable
11337 let nullable_date = match ld.this {
11338 Expression::Cast(mut c) => {
11339 c.to = DataType::Nullable {
11340 inner: Box::new(DataType::Date),
11341 };
11342 Expression::Cast(c)
11343 }
11344 other => other,
11345 };
11346 ld.this = nullable_date;
11347 Ok(Expression::LastDay(ld))
11348 }
11349 _ => Ok(Expression::LastDay(ld)),
11350 }
11351 } else {
11352 Ok(e)
11353 }
11354 }
11355
11356 Action::BigQueryCastFormat => {
11357 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
11358 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
11359 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
11360 let (this, to, format_expr, is_safe) = match e {
11361 Expression::Cast(ref c) if c.format.is_some() => (
11362 c.this.clone(),
11363 c.to.clone(),
11364 c.format.as_ref().unwrap().as_ref().clone(),
11365 false,
11366 ),
11367 Expression::SafeCast(ref c) if c.format.is_some() => (
11368 c.this.clone(),
11369 c.to.clone(),
11370 c.format.as_ref().unwrap().as_ref().clone(),
11371 true,
11372 ),
11373 _ => return Ok(e),
11374 };
11375 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
11376 if matches!(target, DialectType::BigQuery) {
11377 match &to {
11378 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
11379 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
11380 return Ok(e);
11381 }
11382 _ => {}
11383 }
11384 }
11385 // Extract timezone from format if AT TIME ZONE is present
11386 let (actual_format_expr, timezone) = match &format_expr {
11387 Expression::AtTimeZone(ref atz) => {
11388 (atz.this.clone(), Some(atz.zone.clone()))
11389 }
11390 _ => (format_expr.clone(), None),
11391 };
11392 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
11393 match target {
11394 DialectType::BigQuery => {
11395 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
11396 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
11397 let func_name = match &to {
11398 DataType::Date => "PARSE_DATE",
11399 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
11400 DataType::Time { .. } => "PARSE_TIMESTAMP",
11401 _ => "PARSE_TIMESTAMP",
11402 };
11403 let mut func_args = vec![strftime_fmt, this];
11404 if let Some(tz) = timezone {
11405 func_args.push(tz);
11406 }
11407 Ok(Expression::Function(Box::new(Function::new(
11408 func_name.to_string(),
11409 func_args,
11410 ))))
11411 }
11412 DialectType::DuckDB => {
11413 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
11414 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
11415 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
11416 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
11417 let parse_call = Expression::Function(Box::new(Function::new(
11418 parse_fn_name.to_string(),
11419 vec![this, duck_fmt],
11420 )));
11421 Ok(Expression::Cast(Box::new(Cast {
11422 this: parse_call,
11423 to,
11424 trailing_comments: vec![],
11425 double_colon_syntax: false,
11426 format: None,
11427 default: None,
11428 inferred_type: None,
11429 })))
11430 }
11431 _ => Ok(e),
11432 }
11433 }
11434
11435 Action::BigQueryFunctionNormalize => {
11436 Self::normalize_bigquery_function(e, source, target)
11437 }
11438
11439 Action::BigQueryToHexBare => {
11440 // Not used anymore - handled directly in normalize_bigquery_function
11441 Ok(e)
11442 }
11443
11444 Action::BigQueryToHexLower => {
11445 if let Expression::Lower(uf) = e {
11446 match uf.this {
11447 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
11448 Expression::Function(f)
11449 if matches!(target, DialectType::BigQuery)
11450 && f.name == "TO_HEX" =>
11451 {
11452 Ok(Expression::Function(f))
11453 }
11454 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
11455 Expression::Lower(inner_uf) => {
11456 if matches!(target, DialectType::BigQuery) {
11457 // BQ->BQ: extract TO_HEX
11458 if let Expression::Function(f) = inner_uf.this {
11459 Ok(Expression::Function(Box::new(Function::new(
11460 "TO_HEX".to_string(),
11461 f.args,
11462 ))))
11463 } else {
11464 Ok(Expression::Lower(inner_uf))
11465 }
11466 } else {
11467 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
11468 Ok(Expression::Lower(inner_uf))
11469 }
11470 }
11471 other => {
11472 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
11473 this: other,
11474 original_name: None,
11475 inferred_type: None,
11476 })))
11477 }
11478 }
11479 } else {
11480 Ok(e)
11481 }
11482 }
11483
11484 Action::BigQueryToHexUpper => {
11485 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
11486 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
11487 if let Expression::Upper(uf) = e {
11488 if let Expression::Lower(inner_uf) = uf.this {
11489 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
11490 if matches!(target, DialectType::BigQuery) {
11491 // Restore TO_HEX name in inner function
11492 if let Expression::Function(f) = inner_uf.this {
11493 let restored = Expression::Function(Box::new(Function::new(
11494 "TO_HEX".to_string(),
11495 f.args,
11496 )));
11497 Ok(Expression::Upper(Box::new(
11498 crate::expressions::UnaryFunc::new(restored),
11499 )))
11500 } else {
11501 Ok(Expression::Upper(inner_uf))
11502 }
11503 } else {
11504 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
11505 Ok(inner_uf.this)
11506 }
11507 } else {
11508 Ok(Expression::Upper(uf))
11509 }
11510 } else {
11511 Ok(e)
11512 }
11513 }
11514
11515 Action::BigQueryAnyValueHaving => {
11516 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
11517 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
11518 if let Expression::AnyValue(agg) = e {
11519 if let Some((having_expr, is_max)) = agg.having_max {
11520 let func_name = if is_max {
11521 "ARG_MAX_NULL"
11522 } else {
11523 "ARG_MIN_NULL"
11524 };
11525 Ok(Expression::Function(Box::new(Function::new(
11526 func_name.to_string(),
11527 vec![agg.this, *having_expr],
11528 ))))
11529 } else {
11530 Ok(Expression::AnyValue(agg))
11531 }
11532 } else {
11533 Ok(e)
11534 }
11535 }
11536
11537 Action::BigQueryApproxQuantiles => {
11538 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
11539 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
11540 if let Expression::AggregateFunction(agg) = e {
11541 if agg.args.len() >= 2 {
11542 let x_expr = agg.args[0].clone();
11543 let n_expr = &agg.args[1];
11544
11545 // Extract the numeric value from n_expr
11546 let n = match n_expr {
11547 Expression::Literal(lit)
11548 if matches!(
11549 lit.as_ref(),
11550 crate::expressions::Literal::Number(_)
11551 ) =>
11552 {
11553 let crate::expressions::Literal::Number(s) = lit.as_ref()
11554 else {
11555 unreachable!()
11556 };
11557 s.parse::<usize>().unwrap_or(2)
11558 }
11559 _ => 2,
11560 };
11561
11562 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
11563 let mut quantiles = Vec::new();
11564 for i in 0..=n {
11565 let q = i as f64 / n as f64;
11566 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
11567 if q == 0.0 {
11568 quantiles.push(Expression::number(0));
11569 } else if q == 1.0 {
11570 quantiles.push(Expression::number(1));
11571 } else {
11572 quantiles.push(Expression::Literal(Box::new(
11573 crate::expressions::Literal::Number(format!("{}", q)),
11574 )));
11575 }
11576 }
11577
11578 let array_expr =
11579 Expression::Array(Box::new(crate::expressions::Array {
11580 expressions: quantiles,
11581 }));
11582
11583 // Preserve DISTINCT modifier
11584 let mut new_func = Function::new(
11585 "APPROX_QUANTILE".to_string(),
11586 vec![x_expr, array_expr],
11587 );
11588 new_func.distinct = agg.distinct;
11589 Ok(Expression::Function(Box::new(new_func)))
11590 } else {
11591 Ok(Expression::AggregateFunction(agg))
11592 }
11593 } else {
11594 Ok(e)
11595 }
11596 }
11597
11598 Action::GenericFunctionNormalize => {
11599 // Helper closure to convert ARBITRARY to target-specific function
11600 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
11601 let name = match target {
11602 DialectType::ClickHouse => "any",
11603 DialectType::TSQL | DialectType::SQLite => "MAX",
11604 DialectType::Hive => "FIRST",
11605 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11606 "ARBITRARY"
11607 }
11608 _ => "ANY_VALUE",
11609 };
11610 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
11611 }
11612
11613 if let Expression::Function(f) = e {
11614 let name = f.name.to_ascii_uppercase();
11615 match name.as_str() {
11616 "ARBITRARY" if f.args.len() == 1 => {
11617 let arg = f.args.into_iter().next().unwrap();
11618 Ok(convert_arbitrary(arg, target))
11619 }
11620 "TO_NUMBER" if f.args.len() == 1 => {
11621 let arg = f.args.into_iter().next().unwrap();
11622 match target {
11623 DialectType::Oracle | DialectType::Snowflake => {
11624 Ok(Expression::Function(Box::new(Function::new(
11625 "TO_NUMBER".to_string(),
11626 vec![arg],
11627 ))))
11628 }
11629 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11630 this: arg,
11631 to: crate::expressions::DataType::Double {
11632 precision: None,
11633 scale: None,
11634 },
11635 double_colon_syntax: false,
11636 trailing_comments: Vec::new(),
11637 format: None,
11638 default: None,
11639 inferred_type: None,
11640 }))),
11641 }
11642 }
11643 "AGGREGATE" if f.args.len() >= 3 => match target {
11644 DialectType::DuckDB
11645 | DialectType::Hive
11646 | DialectType::Presto
11647 | DialectType::Trino => Ok(Expression::Function(Box::new(
11648 Function::new("REDUCE".to_string(), f.args),
11649 ))),
11650 _ => Ok(Expression::Function(f)),
11651 },
11652 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
11653 "REGEXP_MATCHES" if f.args.len() >= 2 => {
11654 if matches!(target, DialectType::DuckDB) {
11655 Ok(Expression::Function(f))
11656 } else {
11657 let mut args = f.args;
11658 let this = args.remove(0);
11659 let pattern = args.remove(0);
11660 let flags = if args.is_empty() {
11661 None
11662 } else {
11663 Some(args.remove(0))
11664 };
11665 Ok(Expression::RegexpLike(Box::new(
11666 crate::expressions::RegexpFunc {
11667 this,
11668 pattern,
11669 flags,
11670 },
11671 )))
11672 }
11673 }
11674 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
11675 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
11676 if matches!(target, DialectType::DuckDB) {
11677 Ok(Expression::Function(f))
11678 } else {
11679 let mut args = f.args;
11680 let this = args.remove(0);
11681 let pattern = args.remove(0);
11682 let flags = if args.is_empty() {
11683 None
11684 } else {
11685 Some(args.remove(0))
11686 };
11687 Ok(Expression::RegexpLike(Box::new(
11688 crate::expressions::RegexpFunc {
11689 this,
11690 pattern,
11691 flags,
11692 },
11693 )))
11694 }
11695 }
11696 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
11697 "STRUCT_EXTRACT" if f.args.len() == 2 => {
11698 let mut args = f.args;
11699 let this = args.remove(0);
11700 let field_expr = args.remove(0);
11701 // Extract string literal to get field name
11702 let field_name = match &field_expr {
11703 Expression::Literal(lit)
11704 if matches!(
11705 lit.as_ref(),
11706 crate::expressions::Literal::String(_)
11707 ) =>
11708 {
11709 let crate::expressions::Literal::String(s) = lit.as_ref()
11710 else {
11711 unreachable!()
11712 };
11713 s.clone()
11714 }
11715 Expression::Identifier(id) => id.name.clone(),
11716 _ => {
11717 return Ok(Expression::Function(Box::new(Function::new(
11718 "STRUCT_EXTRACT".to_string(),
11719 vec![this, field_expr],
11720 ))))
11721 }
11722 };
11723 Ok(Expression::StructExtract(Box::new(
11724 crate::expressions::StructExtractFunc {
11725 this,
11726 field: crate::expressions::Identifier::new(field_name),
11727 },
11728 )))
11729 }
11730 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
11731 "LIST_FILTER" if f.args.len() == 2 => {
11732 let name = match target {
11733 DialectType::DuckDB => "LIST_FILTER",
11734 _ => "FILTER",
11735 };
11736 Ok(Expression::Function(Box::new(Function::new(
11737 name.to_string(),
11738 f.args,
11739 ))))
11740 }
11741 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
11742 "LIST_TRANSFORM" if f.args.len() == 2 => {
11743 let name = match target {
11744 DialectType::DuckDB => "LIST_TRANSFORM",
11745 _ => "TRANSFORM",
11746 };
11747 Ok(Expression::Function(Box::new(Function::new(
11748 name.to_string(),
11749 f.args,
11750 ))))
11751 }
11752 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
11753 "LIST_SORT" if f.args.len() >= 1 => {
11754 let name = match target {
11755 DialectType::DuckDB => "LIST_SORT",
11756 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
11757 _ => "SORT_ARRAY",
11758 };
11759 Ok(Expression::Function(Box::new(Function::new(
11760 name.to_string(),
11761 f.args,
11762 ))))
11763 }
11764 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
11765 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
11766 match target {
11767 DialectType::DuckDB => Ok(Expression::Function(Box::new(
11768 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
11769 ))),
11770 DialectType::Spark
11771 | DialectType::Databricks
11772 | DialectType::Hive => {
11773 let mut args = f.args;
11774 args.push(Expression::Identifier(
11775 crate::expressions::Identifier::new("FALSE"),
11776 ));
11777 Ok(Expression::Function(Box::new(Function::new(
11778 "SORT_ARRAY".to_string(),
11779 args,
11780 ))))
11781 }
11782 DialectType::Presto
11783 | DialectType::Trino
11784 | DialectType::Athena => {
11785 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
11786 let arr = f.args.into_iter().next().unwrap();
11787 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
11788 parameters: vec![
11789 crate::expressions::Identifier::new("a"),
11790 crate::expressions::Identifier::new("b"),
11791 ],
11792 body: Expression::Case(Box::new(Case {
11793 operand: None,
11794 whens: vec![
11795 (
11796 Expression::Lt(Box::new(BinaryOp::new(
11797 Expression::Identifier(crate::expressions::Identifier::new("a")),
11798 Expression::Identifier(crate::expressions::Identifier::new("b")),
11799 ))),
11800 Expression::number(1),
11801 ),
11802 (
11803 Expression::Gt(Box::new(BinaryOp::new(
11804 Expression::Identifier(crate::expressions::Identifier::new("a")),
11805 Expression::Identifier(crate::expressions::Identifier::new("b")),
11806 ))),
11807 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
11808 ),
11809 ],
11810 else_: Some(Expression::number(0)),
11811 comments: Vec::new(),
11812 inferred_type: None,
11813 })),
11814 colon: false,
11815 parameter_types: Vec::new(),
11816 }));
11817 Ok(Expression::Function(Box::new(Function::new(
11818 "ARRAY_SORT".to_string(),
11819 vec![arr, lambda],
11820 ))))
11821 }
11822 _ => Ok(Expression::Function(Box::new(Function::new(
11823 "LIST_REVERSE_SORT".to_string(),
11824 f.args,
11825 )))),
11826 }
11827 }
11828 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
11829 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
11830 let mut args = f.args;
11831 args.push(Expression::string(","));
11832 let name = match target {
11833 DialectType::DuckDB => "STR_SPLIT",
11834 DialectType::Presto | DialectType::Trino => "SPLIT",
11835 DialectType::Spark
11836 | DialectType::Databricks
11837 | DialectType::Hive => "SPLIT",
11838 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11839 DialectType::Redshift => "SPLIT_TO_ARRAY",
11840 _ => "SPLIT",
11841 };
11842 Ok(Expression::Function(Box::new(Function::new(
11843 name.to_string(),
11844 args,
11845 ))))
11846 }
11847 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
11848 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
11849 let name = match target {
11850 DialectType::DuckDB => "STR_SPLIT",
11851 DialectType::Presto | DialectType::Trino => "SPLIT",
11852 DialectType::Spark
11853 | DialectType::Databricks
11854 | DialectType::Hive => "SPLIT",
11855 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11856 DialectType::Redshift => "SPLIT_TO_ARRAY",
11857 _ => "SPLIT",
11858 };
11859 Ok(Expression::Function(Box::new(Function::new(
11860 name.to_string(),
11861 f.args,
11862 ))))
11863 }
11864 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
11865 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
11866 let name = match target {
11867 DialectType::DuckDB => "STR_SPLIT",
11868 DialectType::Presto | DialectType::Trino => "SPLIT",
11869 DialectType::Spark
11870 | DialectType::Databricks
11871 | DialectType::Hive => "SPLIT",
11872 DialectType::Doris | DialectType::StarRocks => {
11873 "SPLIT_BY_STRING"
11874 }
11875 DialectType::PostgreSQL | DialectType::Redshift => {
11876 "STRING_TO_ARRAY"
11877 }
11878 _ => "SPLIT",
11879 };
11880 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
11881 if matches!(
11882 target,
11883 DialectType::Spark
11884 | DialectType::Databricks
11885 | DialectType::Hive
11886 ) {
11887 let mut args = f.args;
11888 let x = args.remove(0);
11889 let sep = args.remove(0);
11890 // Wrap separator in CONCAT('\\Q', sep, '\\E')
11891 let escaped_sep =
11892 Expression::Function(Box::new(Function::new(
11893 "CONCAT".to_string(),
11894 vec![
11895 Expression::string("\\Q"),
11896 sep,
11897 Expression::string("\\E"),
11898 ],
11899 )));
11900 Ok(Expression::Function(Box::new(Function::new(
11901 name.to_string(),
11902 vec![x, escaped_sep],
11903 ))))
11904 } else {
11905 Ok(Expression::Function(Box::new(Function::new(
11906 name.to_string(),
11907 f.args,
11908 ))))
11909 }
11910 }
11911 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
11912 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
11913 let name = match target {
11914 DialectType::DuckDB => "STR_SPLIT_REGEX",
11915 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
11916 DialectType::Spark
11917 | DialectType::Databricks
11918 | DialectType::Hive => "SPLIT",
11919 _ => "REGEXP_SPLIT",
11920 };
11921 Ok(Expression::Function(Box::new(Function::new(
11922 name.to_string(),
11923 f.args,
11924 ))))
11925 }
11926 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
11927 "SPLIT"
11928 if f.args.len() == 2
11929 && matches!(source, DialectType::Snowflake)
11930 && matches!(target, DialectType::DuckDB) =>
11931 {
11932 let mut args = f.args;
11933 let str_arg = args.remove(0);
11934 let delim_arg = args.remove(0);
11935
11936 // STR_SPLIT(str, delim) as the base
11937 let base_func = Expression::Function(Box::new(Function::new(
11938 "STR_SPLIT".to_string(),
11939 vec![str_arg.clone(), delim_arg.clone()],
11940 )));
11941
11942 // [str] - array with single element
11943 let array_with_input =
11944 Expression::Array(Box::new(crate::expressions::Array {
11945 expressions: vec![str_arg],
11946 }));
11947
11948 // CASE
11949 // WHEN delim IS NULL THEN NULL
11950 // WHEN delim = '' THEN [str]
11951 // ELSE STR_SPLIT(str, delim)
11952 // END
11953 Ok(Expression::Case(Box::new(Case {
11954 operand: None,
11955 whens: vec![
11956 (
11957 Expression::Is(Box::new(BinaryOp {
11958 left: delim_arg.clone(),
11959 right: Expression::Null(Null),
11960 left_comments: vec![],
11961 operator_comments: vec![],
11962 trailing_comments: vec![],
11963 inferred_type: None,
11964 })),
11965 Expression::Null(Null),
11966 ),
11967 (
11968 Expression::Eq(Box::new(BinaryOp {
11969 left: delim_arg,
11970 right: Expression::string(""),
11971 left_comments: vec![],
11972 operator_comments: vec![],
11973 trailing_comments: vec![],
11974 inferred_type: None,
11975 })),
11976 array_with_input,
11977 ),
11978 ],
11979 else_: Some(base_func),
11980 comments: vec![],
11981 inferred_type: None,
11982 })))
11983 }
11984 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
11985 "SPLIT"
11986 if f.args.len() == 2
11987 && matches!(
11988 source,
11989 DialectType::Presto
11990 | DialectType::Trino
11991 | DialectType::Athena
11992 | DialectType::StarRocks
11993 | DialectType::Doris
11994 )
11995 && matches!(
11996 target,
11997 DialectType::Spark
11998 | DialectType::Databricks
11999 | DialectType::Hive
12000 ) =>
12001 {
12002 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
12003 let mut args = f.args;
12004 let x = args.remove(0);
12005 let sep = args.remove(0);
12006 let escaped_sep = Expression::Function(Box::new(Function::new(
12007 "CONCAT".to_string(),
12008 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
12009 )));
12010 Ok(Expression::Function(Box::new(Function::new(
12011 "SPLIT".to_string(),
12012 vec![x, escaped_sep],
12013 ))))
12014 }
12015 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
12016 // For ClickHouse target, preserve original name to maintain camelCase
12017 "SUBSTRINGINDEX" => {
12018 let name = if matches!(target, DialectType::ClickHouse) {
12019 f.name.clone()
12020 } else {
12021 "SUBSTRING_INDEX".to_string()
12022 };
12023 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
12024 }
12025 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
12026 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
12027 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
12028 if name == "CARDINALITY"
12029 && matches!(source, DialectType::DuckDB)
12030 && matches!(target, DialectType::DuckDB)
12031 {
12032 return Ok(Expression::Function(f));
12033 }
12034 // Get the array argument (first arg, drop dimension args)
12035 let mut args = f.args;
12036 let arr = if args.is_empty() {
12037 return Ok(Expression::Function(Box::new(Function::new(
12038 name.to_string(),
12039 args,
12040 ))));
12041 } else {
12042 args.remove(0)
12043 };
12044 let name =
12045 match target {
12046 DialectType::Spark
12047 | DialectType::Databricks
12048 | DialectType::Hive => "SIZE",
12049 DialectType::Presto | DialectType::Trino => "CARDINALITY",
12050 DialectType::BigQuery => "ARRAY_LENGTH",
12051 DialectType::DuckDB => {
12052 // DuckDB: use ARRAY_LENGTH with all args
12053 let mut all_args = vec![arr];
12054 all_args.extend(args);
12055 return Ok(Expression::Function(Box::new(
12056 Function::new("ARRAY_LENGTH".to_string(), all_args),
12057 )));
12058 }
12059 DialectType::PostgreSQL | DialectType::Redshift => {
12060 // Keep ARRAY_LENGTH with dimension arg
12061 let mut all_args = vec![arr];
12062 all_args.extend(args);
12063 return Ok(Expression::Function(Box::new(
12064 Function::new("ARRAY_LENGTH".to_string(), all_args),
12065 )));
12066 }
12067 DialectType::ClickHouse => "LENGTH",
12068 _ => "ARRAY_LENGTH",
12069 };
12070 Ok(Expression::Function(Box::new(Function::new(
12071 name.to_string(),
12072 vec![arr],
12073 ))))
12074 }
12075 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
12076 "TO_VARIANT" if f.args.len() == 1 => match target {
12077 DialectType::DuckDB => {
12078 let arg = f.args.into_iter().next().unwrap();
12079 Ok(Expression::Cast(Box::new(Cast {
12080 this: arg,
12081 to: DataType::Custom {
12082 name: "VARIANT".to_string(),
12083 },
12084 double_colon_syntax: false,
12085 trailing_comments: Vec::new(),
12086 format: None,
12087 default: None,
12088 inferred_type: None,
12089 })))
12090 }
12091 _ => Ok(Expression::Function(f)),
12092 },
12093 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
12094 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
12095 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12096 Function::new("JSON_AGG".to_string(), f.args),
12097 ))),
12098 _ => Ok(Expression::Function(f)),
12099 },
12100 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
12101 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
12102 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12103 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
12104 ))),
12105 _ => Ok(Expression::Function(f)),
12106 },
12107 // UNICODE(x) -> target-specific codepoint function
12108 "UNICODE" if f.args.len() == 1 => {
12109 match target {
12110 DialectType::SQLite | DialectType::DuckDB => {
12111 Ok(Expression::Function(Box::new(Function::new(
12112 "UNICODE".to_string(),
12113 f.args,
12114 ))))
12115 }
12116 DialectType::Oracle => {
12117 // ASCII(UNISTR(x))
12118 let inner = Expression::Function(Box::new(Function::new(
12119 "UNISTR".to_string(),
12120 f.args,
12121 )));
12122 Ok(Expression::Function(Box::new(Function::new(
12123 "ASCII".to_string(),
12124 vec![inner],
12125 ))))
12126 }
12127 DialectType::MySQL => {
12128 // ORD(CONVERT(x USING utf32))
12129 let arg = f.args.into_iter().next().unwrap();
12130 let convert_expr = Expression::ConvertToCharset(Box::new(
12131 crate::expressions::ConvertToCharset {
12132 this: Box::new(arg),
12133 dest: Some(Box::new(Expression::Identifier(
12134 crate::expressions::Identifier::new("utf32"),
12135 ))),
12136 source: None,
12137 },
12138 ));
12139 Ok(Expression::Function(Box::new(Function::new(
12140 "ORD".to_string(),
12141 vec![convert_expr],
12142 ))))
12143 }
12144 _ => Ok(Expression::Function(Box::new(Function::new(
12145 "ASCII".to_string(),
12146 f.args,
12147 )))),
12148 }
12149 }
12150 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
12151 "XOR" if f.args.len() >= 2 => {
12152 match target {
12153 DialectType::ClickHouse => {
12154 // ClickHouse: keep as xor() function with lowercase name
12155 Ok(Expression::Function(Box::new(Function::new(
12156 "xor".to_string(),
12157 f.args,
12158 ))))
12159 }
12160 DialectType::Presto | DialectType::Trino => {
12161 if f.args.len() == 2 {
12162 Ok(Expression::Function(Box::new(Function::new(
12163 "BITWISE_XOR".to_string(),
12164 f.args,
12165 ))))
12166 } else {
12167 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
12168 let mut args = f.args;
12169 let first = args.remove(0);
12170 let second = args.remove(0);
12171 let mut result =
12172 Expression::Function(Box::new(Function::new(
12173 "BITWISE_XOR".to_string(),
12174 vec![first, second],
12175 )));
12176 for arg in args {
12177 result =
12178 Expression::Function(Box::new(Function::new(
12179 "BITWISE_XOR".to_string(),
12180 vec![result, arg],
12181 )));
12182 }
12183 Ok(result)
12184 }
12185 }
12186 DialectType::MySQL
12187 | DialectType::SingleStore
12188 | DialectType::Doris
12189 | DialectType::StarRocks => {
12190 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
12191 let args = f.args;
12192 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
12193 this: None,
12194 expression: None,
12195 expressions: args,
12196 })))
12197 }
12198 DialectType::PostgreSQL | DialectType::Redshift => {
12199 // PostgreSQL: a # b (hash operator for XOR)
12200 let mut args = f.args;
12201 let first = args.remove(0);
12202 let second = args.remove(0);
12203 let mut result = Expression::BitwiseXor(Box::new(
12204 BinaryOp::new(first, second),
12205 ));
12206 for arg in args {
12207 result = Expression::BitwiseXor(Box::new(
12208 BinaryOp::new(result, arg),
12209 ));
12210 }
12211 Ok(result)
12212 }
12213 DialectType::DuckDB => {
12214 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
12215 Ok(Expression::Function(Box::new(Function::new(
12216 "XOR".to_string(),
12217 f.args,
12218 ))))
12219 }
12220 DialectType::BigQuery => {
12221 // BigQuery: a ^ b (caret operator for XOR)
12222 let mut args = f.args;
12223 let first = args.remove(0);
12224 let second = args.remove(0);
12225 let mut result = Expression::BitwiseXor(Box::new(
12226 BinaryOp::new(first, second),
12227 ));
12228 for arg in args {
12229 result = Expression::BitwiseXor(Box::new(
12230 BinaryOp::new(result, arg),
12231 ));
12232 }
12233 Ok(result)
12234 }
12235 _ => Ok(Expression::Function(Box::new(Function::new(
12236 "XOR".to_string(),
12237 f.args,
12238 )))),
12239 }
12240 }
12241 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
12242 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
12243 match target {
12244 DialectType::Spark
12245 | DialectType::Databricks
12246 | DialectType::Hive => {
12247 let mut args = f.args;
12248 args.push(Expression::Identifier(
12249 crate::expressions::Identifier::new("FALSE"),
12250 ));
12251 Ok(Expression::Function(Box::new(Function::new(
12252 "SORT_ARRAY".to_string(),
12253 args,
12254 ))))
12255 }
12256 DialectType::Presto
12257 | DialectType::Trino
12258 | DialectType::Athena => {
12259 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
12260 let arr = f.args.into_iter().next().unwrap();
12261 let lambda = Expression::Lambda(Box::new(
12262 crate::expressions::LambdaExpr {
12263 parameters: vec![
12264 Identifier::new("a"),
12265 Identifier::new("b"),
12266 ],
12267 colon: false,
12268 parameter_types: Vec::new(),
12269 body: Expression::Case(Box::new(Case {
12270 operand: None,
12271 whens: vec![
12272 (
12273 Expression::Lt(Box::new(
12274 BinaryOp::new(
12275 Expression::Identifier(
12276 Identifier::new("a"),
12277 ),
12278 Expression::Identifier(
12279 Identifier::new("b"),
12280 ),
12281 ),
12282 )),
12283 Expression::number(1),
12284 ),
12285 (
12286 Expression::Gt(Box::new(
12287 BinaryOp::new(
12288 Expression::Identifier(
12289 Identifier::new("a"),
12290 ),
12291 Expression::Identifier(
12292 Identifier::new("b"),
12293 ),
12294 ),
12295 )),
12296 Expression::Neg(Box::new(
12297 crate::expressions::UnaryOp {
12298 this: Expression::number(1),
12299 inferred_type: None,
12300 },
12301 )),
12302 ),
12303 ],
12304 else_: Some(Expression::number(0)),
12305 comments: Vec::new(),
12306 inferred_type: None,
12307 })),
12308 },
12309 ));
12310 Ok(Expression::Function(Box::new(Function::new(
12311 "ARRAY_SORT".to_string(),
12312 vec![arr, lambda],
12313 ))))
12314 }
12315 _ => Ok(Expression::Function(Box::new(Function::new(
12316 "ARRAY_REVERSE_SORT".to_string(),
12317 f.args,
12318 )))),
12319 }
12320 }
12321 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
12322 "ENCODE" if f.args.len() == 1 => match target {
12323 DialectType::Spark
12324 | DialectType::Databricks
12325 | DialectType::Hive => {
12326 let mut args = f.args;
12327 args.push(Expression::string("utf-8"));
12328 Ok(Expression::Function(Box::new(Function::new(
12329 "ENCODE".to_string(),
12330 args,
12331 ))))
12332 }
12333 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12334 Ok(Expression::Function(Box::new(Function::new(
12335 "TO_UTF8".to_string(),
12336 f.args,
12337 ))))
12338 }
12339 _ => Ok(Expression::Function(Box::new(Function::new(
12340 "ENCODE".to_string(),
12341 f.args,
12342 )))),
12343 },
12344 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
12345 "DECODE" if f.args.len() == 1 => match target {
12346 DialectType::Spark
12347 | DialectType::Databricks
12348 | DialectType::Hive => {
12349 let mut args = f.args;
12350 args.push(Expression::string("utf-8"));
12351 Ok(Expression::Function(Box::new(Function::new(
12352 "DECODE".to_string(),
12353 args,
12354 ))))
12355 }
12356 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12357 Ok(Expression::Function(Box::new(Function::new(
12358 "FROM_UTF8".to_string(),
12359 f.args,
12360 ))))
12361 }
12362 _ => Ok(Expression::Function(Box::new(Function::new(
12363 "DECODE".to_string(),
12364 f.args,
12365 )))),
12366 },
12367 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
12368 "QUANTILE" if f.args.len() == 2 => {
12369 let name = match target {
12370 DialectType::Spark
12371 | DialectType::Databricks
12372 | DialectType::Hive => "PERCENTILE",
12373 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
12374 DialectType::BigQuery => "PERCENTILE_CONT",
12375 _ => "QUANTILE",
12376 };
12377 Ok(Expression::Function(Box::new(Function::new(
12378 name.to_string(),
12379 f.args,
12380 ))))
12381 }
12382 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12383 "QUANTILE_CONT" if f.args.len() == 2 => {
12384 let mut args = f.args;
12385 let column = args.remove(0);
12386 let quantile = args.remove(0);
12387 match target {
12388 DialectType::DuckDB => {
12389 Ok(Expression::Function(Box::new(Function::new(
12390 "QUANTILE_CONT".to_string(),
12391 vec![column, quantile],
12392 ))))
12393 }
12394 DialectType::PostgreSQL
12395 | DialectType::Redshift
12396 | DialectType::Snowflake => {
12397 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
12398 let inner = Expression::PercentileCont(Box::new(
12399 crate::expressions::PercentileFunc {
12400 this: column.clone(),
12401 percentile: quantile,
12402 order_by: None,
12403 filter: None,
12404 },
12405 ));
12406 Ok(Expression::WithinGroup(Box::new(
12407 crate::expressions::WithinGroup {
12408 this: inner,
12409 order_by: vec![crate::expressions::Ordered {
12410 this: column,
12411 desc: false,
12412 nulls_first: None,
12413 explicit_asc: false,
12414 with_fill: None,
12415 }],
12416 },
12417 )))
12418 }
12419 _ => Ok(Expression::Function(Box::new(Function::new(
12420 "QUANTILE_CONT".to_string(),
12421 vec![column, quantile],
12422 )))),
12423 }
12424 }
12425 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12426 "QUANTILE_DISC" if f.args.len() == 2 => {
12427 let mut args = f.args;
12428 let column = args.remove(0);
12429 let quantile = args.remove(0);
12430 match target {
12431 DialectType::DuckDB => {
12432 Ok(Expression::Function(Box::new(Function::new(
12433 "QUANTILE_DISC".to_string(),
12434 vec![column, quantile],
12435 ))))
12436 }
12437 DialectType::PostgreSQL
12438 | DialectType::Redshift
12439 | DialectType::Snowflake => {
12440 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
12441 let inner = Expression::PercentileDisc(Box::new(
12442 crate::expressions::PercentileFunc {
12443 this: column.clone(),
12444 percentile: quantile,
12445 order_by: None,
12446 filter: None,
12447 },
12448 ));
12449 Ok(Expression::WithinGroup(Box::new(
12450 crate::expressions::WithinGroup {
12451 this: inner,
12452 order_by: vec![crate::expressions::Ordered {
12453 this: column,
12454 desc: false,
12455 nulls_first: None,
12456 explicit_asc: false,
12457 with_fill: None,
12458 }],
12459 },
12460 )))
12461 }
12462 _ => Ok(Expression::Function(Box::new(Function::new(
12463 "QUANTILE_DISC".to_string(),
12464 vec![column, quantile],
12465 )))),
12466 }
12467 }
12468 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
12469 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
12470 let name = match target {
12471 DialectType::Presto
12472 | DialectType::Trino
12473 | DialectType::Athena => "APPROX_PERCENTILE",
12474 DialectType::Spark
12475 | DialectType::Databricks
12476 | DialectType::Hive => "PERCENTILE_APPROX",
12477 DialectType::DuckDB => "APPROX_QUANTILE",
12478 DialectType::PostgreSQL | DialectType::Redshift => {
12479 "PERCENTILE_CONT"
12480 }
12481 _ => &f.name,
12482 };
12483 Ok(Expression::Function(Box::new(Function::new(
12484 name.to_string(),
12485 f.args,
12486 ))))
12487 }
12488 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
12489 "EPOCH" if f.args.len() == 1 => {
12490 let name = match target {
12491 DialectType::Spark
12492 | DialectType::Databricks
12493 | DialectType::Hive => "UNIX_TIMESTAMP",
12494 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
12495 _ => "EPOCH",
12496 };
12497 Ok(Expression::Function(Box::new(Function::new(
12498 name.to_string(),
12499 f.args,
12500 ))))
12501 }
12502 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
12503 "EPOCH_MS" if f.args.len() == 1 => {
12504 match target {
12505 DialectType::Spark | DialectType::Databricks => {
12506 Ok(Expression::Function(Box::new(Function::new(
12507 "TIMESTAMP_MILLIS".to_string(),
12508 f.args,
12509 ))))
12510 }
12511 DialectType::Hive => {
12512 // Hive: FROM_UNIXTIME(x / 1000)
12513 let arg = f.args.into_iter().next().unwrap();
12514 let div_expr = Expression::Div(Box::new(
12515 crate::expressions::BinaryOp::new(
12516 arg,
12517 Expression::number(1000),
12518 ),
12519 ));
12520 Ok(Expression::Function(Box::new(Function::new(
12521 "FROM_UNIXTIME".to_string(),
12522 vec![div_expr],
12523 ))))
12524 }
12525 DialectType::Presto | DialectType::Trino => {
12526 Ok(Expression::Function(Box::new(Function::new(
12527 "FROM_UNIXTIME".to_string(),
12528 vec![Expression::Div(Box::new(
12529 crate::expressions::BinaryOp::new(
12530 f.args.into_iter().next().unwrap(),
12531 Expression::number(1000),
12532 ),
12533 ))],
12534 ))))
12535 }
12536 _ => Ok(Expression::Function(Box::new(Function::new(
12537 "EPOCH_MS".to_string(),
12538 f.args,
12539 )))),
12540 }
12541 }
12542 // HASHBYTES('algorithm', x) -> target-specific hash function
12543 "HASHBYTES" if f.args.len() == 2 => {
12544 // Keep HASHBYTES as-is for TSQL target
12545 if matches!(target, DialectType::TSQL) {
12546 return Ok(Expression::Function(f));
12547 }
12548 let algo_expr = &f.args[0];
12549 let algo = match algo_expr {
12550 Expression::Literal(lit)
12551 if matches!(
12552 lit.as_ref(),
12553 crate::expressions::Literal::String(_)
12554 ) =>
12555 {
12556 let crate::expressions::Literal::String(s) = lit.as_ref()
12557 else {
12558 unreachable!()
12559 };
12560 s.to_ascii_uppercase()
12561 }
12562 _ => return Ok(Expression::Function(f)),
12563 };
12564 let data_arg = f.args.into_iter().nth(1).unwrap();
12565 match algo.as_str() {
12566 "SHA1" => {
12567 let name = match target {
12568 DialectType::Spark | DialectType::Databricks => "SHA",
12569 DialectType::Hive => "SHA1",
12570 _ => "SHA1",
12571 };
12572 Ok(Expression::Function(Box::new(Function::new(
12573 name.to_string(),
12574 vec![data_arg],
12575 ))))
12576 }
12577 "SHA2_256" => {
12578 Ok(Expression::Function(Box::new(Function::new(
12579 "SHA2".to_string(),
12580 vec![data_arg, Expression::number(256)],
12581 ))))
12582 }
12583 "SHA2_512" => {
12584 Ok(Expression::Function(Box::new(Function::new(
12585 "SHA2".to_string(),
12586 vec![data_arg, Expression::number(512)],
12587 ))))
12588 }
12589 "MD5" => Ok(Expression::Function(Box::new(Function::new(
12590 "MD5".to_string(),
12591 vec![data_arg],
12592 )))),
12593 _ => Ok(Expression::Function(Box::new(Function::new(
12594 "HASHBYTES".to_string(),
12595 vec![Expression::string(&algo), data_arg],
12596 )))),
12597 }
12598 }
12599 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
12600 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
12601 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
12602 let mut args = f.args;
12603 let json_expr = args.remove(0);
12604 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
12605 let mut json_path = "$".to_string();
12606 for a in &args {
12607 match a {
12608 Expression::Literal(lit)
12609 if matches!(
12610 lit.as_ref(),
12611 crate::expressions::Literal::String(_)
12612 ) =>
12613 {
12614 let crate::expressions::Literal::String(s) =
12615 lit.as_ref()
12616 else {
12617 unreachable!()
12618 };
12619 // Numeric string keys become array indices: [0]
12620 if s.chars().all(|c| c.is_ascii_digit()) {
12621 json_path.push('[');
12622 json_path.push_str(s);
12623 json_path.push(']');
12624 } else {
12625 json_path.push('.');
12626 json_path.push_str(s);
12627 }
12628 }
12629 _ => {
12630 json_path.push_str(".?");
12631 }
12632 }
12633 }
12634 match target {
12635 DialectType::Spark
12636 | DialectType::Databricks
12637 | DialectType::Hive => {
12638 Ok(Expression::Function(Box::new(Function::new(
12639 "GET_JSON_OBJECT".to_string(),
12640 vec![json_expr, Expression::string(&json_path)],
12641 ))))
12642 }
12643 DialectType::Presto | DialectType::Trino => {
12644 let func_name = if is_text {
12645 "JSON_EXTRACT_SCALAR"
12646 } else {
12647 "JSON_EXTRACT"
12648 };
12649 Ok(Expression::Function(Box::new(Function::new(
12650 func_name.to_string(),
12651 vec![json_expr, Expression::string(&json_path)],
12652 ))))
12653 }
12654 DialectType::BigQuery | DialectType::MySQL => {
12655 let func_name = if is_text {
12656 "JSON_EXTRACT_SCALAR"
12657 } else {
12658 "JSON_EXTRACT"
12659 };
12660 Ok(Expression::Function(Box::new(Function::new(
12661 func_name.to_string(),
12662 vec![json_expr, Expression::string(&json_path)],
12663 ))))
12664 }
12665 DialectType::PostgreSQL | DialectType::Materialize => {
12666 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
12667 let func_name = if is_text {
12668 "JSON_EXTRACT_PATH_TEXT"
12669 } else {
12670 "JSON_EXTRACT_PATH"
12671 };
12672 let mut new_args = vec![json_expr];
12673 new_args.extend(args);
12674 Ok(Expression::Function(Box::new(Function::new(
12675 func_name.to_string(),
12676 new_args,
12677 ))))
12678 }
12679 DialectType::DuckDB | DialectType::SQLite => {
12680 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
12681 if is_text {
12682 Ok(Expression::JsonExtractScalar(Box::new(
12683 crate::expressions::JsonExtractFunc {
12684 this: json_expr,
12685 path: Expression::string(&json_path),
12686 returning: None,
12687 arrow_syntax: true,
12688 hash_arrow_syntax: false,
12689 wrapper_option: None,
12690 quotes_option: None,
12691 on_scalar_string: false,
12692 on_error: None,
12693 },
12694 )))
12695 } else {
12696 Ok(Expression::JsonExtract(Box::new(
12697 crate::expressions::JsonExtractFunc {
12698 this: json_expr,
12699 path: Expression::string(&json_path),
12700 returning: None,
12701 arrow_syntax: true,
12702 hash_arrow_syntax: false,
12703 wrapper_option: None,
12704 quotes_option: None,
12705 on_scalar_string: false,
12706 on_error: None,
12707 },
12708 )))
12709 }
12710 }
12711 DialectType::Redshift => {
12712 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
12713 let mut new_args = vec![json_expr];
12714 new_args.extend(args);
12715 Ok(Expression::Function(Box::new(Function::new(
12716 "JSON_EXTRACT_PATH_TEXT".to_string(),
12717 new_args,
12718 ))))
12719 }
12720 DialectType::TSQL => {
12721 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
12722 let jq = Expression::Function(Box::new(Function::new(
12723 "JSON_QUERY".to_string(),
12724 vec![json_expr.clone(), Expression::string(&json_path)],
12725 )));
12726 let jv = Expression::Function(Box::new(Function::new(
12727 "JSON_VALUE".to_string(),
12728 vec![json_expr, Expression::string(&json_path)],
12729 )));
12730 Ok(Expression::Function(Box::new(Function::new(
12731 "ISNULL".to_string(),
12732 vec![jq, jv],
12733 ))))
12734 }
12735 DialectType::ClickHouse => {
12736 let func_name = if is_text {
12737 "JSONExtractString"
12738 } else {
12739 "JSONExtractRaw"
12740 };
12741 let mut new_args = vec![json_expr];
12742 new_args.extend(args);
12743 Ok(Expression::Function(Box::new(Function::new(
12744 func_name.to_string(),
12745 new_args,
12746 ))))
12747 }
12748 _ => {
12749 let func_name = if is_text {
12750 "JSON_EXTRACT_SCALAR"
12751 } else {
12752 "JSON_EXTRACT"
12753 };
12754 Ok(Expression::Function(Box::new(Function::new(
12755 func_name.to_string(),
12756 vec![json_expr, Expression::string(&json_path)],
12757 ))))
12758 }
12759 }
12760 }
12761 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
12762 "APPROX_DISTINCT" if f.args.len() >= 1 => {
12763 let name = match target {
12764 DialectType::Spark
12765 | DialectType::Databricks
12766 | DialectType::Hive
12767 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
12768 _ => "APPROX_DISTINCT",
12769 };
12770 let mut args = f.args;
12771 // Hive doesn't support the accuracy parameter
12772 if name == "APPROX_COUNT_DISTINCT"
12773 && matches!(target, DialectType::Hive)
12774 {
12775 args.truncate(1);
12776 }
12777 Ok(Expression::Function(Box::new(Function::new(
12778 name.to_string(),
12779 args,
12780 ))))
12781 }
12782 // REGEXP_EXTRACT(x, pattern) - normalize default group index
12783 "REGEXP_EXTRACT" if f.args.len() == 2 => {
12784 // Determine source default group index
12785 let source_default = match source {
12786 DialectType::Presto
12787 | DialectType::Trino
12788 | DialectType::DuckDB => 0,
12789 _ => 1, // Hive/Spark/Databricks default = 1
12790 };
12791 // Determine target default group index
12792 let target_default = match target {
12793 DialectType::Presto
12794 | DialectType::Trino
12795 | DialectType::DuckDB
12796 | DialectType::BigQuery => 0,
12797 DialectType::Snowflake => {
12798 // Snowflake uses REGEXP_SUBSTR
12799 return Ok(Expression::Function(Box::new(Function::new(
12800 "REGEXP_SUBSTR".to_string(),
12801 f.args,
12802 ))));
12803 }
12804 _ => 1, // Hive/Spark/Databricks default = 1
12805 };
12806 if source_default != target_default {
12807 let mut args = f.args;
12808 args.push(Expression::number(source_default));
12809 Ok(Expression::Function(Box::new(Function::new(
12810 "REGEXP_EXTRACT".to_string(),
12811 args,
12812 ))))
12813 } else {
12814 Ok(Expression::Function(Box::new(Function::new(
12815 "REGEXP_EXTRACT".to_string(),
12816 f.args,
12817 ))))
12818 }
12819 }
12820 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
12821 "RLIKE" if f.args.len() == 2 => {
12822 let mut args = f.args;
12823 let str_expr = args.remove(0);
12824 let pattern = args.remove(0);
12825 match target {
12826 DialectType::DuckDB => {
12827 // REGEXP_MATCHES(str, pattern)
12828 Ok(Expression::Function(Box::new(Function::new(
12829 "REGEXP_MATCHES".to_string(),
12830 vec![str_expr, pattern],
12831 ))))
12832 }
12833 _ => {
12834 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
12835 Ok(Expression::RegexpLike(Box::new(
12836 crate::expressions::RegexpFunc {
12837 this: str_expr,
12838 pattern,
12839 flags: None,
12840 },
12841 )))
12842 }
12843 }
12844 }
12845 // EOMONTH(date[, month_offset]) -> target-specific
12846 "EOMONTH" if f.args.len() >= 1 => {
12847 let mut args = f.args;
12848 let date_arg = args.remove(0);
12849 let month_offset = if !args.is_empty() {
12850 Some(args.remove(0))
12851 } else {
12852 None
12853 };
12854
12855 // Helper: wrap date in CAST to DATE
12856 let cast_to_date = |e: Expression| -> Expression {
12857 Expression::Cast(Box::new(Cast {
12858 this: e,
12859 to: DataType::Date,
12860 trailing_comments: vec![],
12861 double_colon_syntax: false,
12862 format: None,
12863 default: None,
12864 inferred_type: None,
12865 }))
12866 };
12867
12868 match target {
12869 DialectType::TSQL | DialectType::Fabric => {
12870 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
12871 let date = cast_to_date(date_arg);
12872 let date = if let Some(offset) = month_offset {
12873 Expression::Function(Box::new(Function::new(
12874 "DATEADD".to_string(),
12875 vec![
12876 Expression::Identifier(Identifier::new(
12877 "MONTH",
12878 )),
12879 offset,
12880 date,
12881 ],
12882 )))
12883 } else {
12884 date
12885 };
12886 Ok(Expression::Function(Box::new(Function::new(
12887 "EOMONTH".to_string(),
12888 vec![date],
12889 ))))
12890 }
12891 DialectType::Presto
12892 | DialectType::Trino
12893 | DialectType::Athena => {
12894 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
12895 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
12896 let cast_ts = Expression::Cast(Box::new(Cast {
12897 this: date_arg,
12898 to: DataType::Timestamp {
12899 timezone: false,
12900 precision: None,
12901 },
12902 trailing_comments: vec![],
12903 double_colon_syntax: false,
12904 format: None,
12905 default: None,
12906 inferred_type: None,
12907 }));
12908 let date = cast_to_date(cast_ts);
12909 let date = if let Some(offset) = month_offset {
12910 Expression::Function(Box::new(Function::new(
12911 "DATE_ADD".to_string(),
12912 vec![Expression::string("MONTH"), offset, date],
12913 )))
12914 } else {
12915 date
12916 };
12917 Ok(Expression::Function(Box::new(Function::new(
12918 "LAST_DAY_OF_MONTH".to_string(),
12919 vec![date],
12920 ))))
12921 }
12922 DialectType::PostgreSQL => {
12923 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
12924 let date = cast_to_date(date_arg);
12925 let date = if let Some(offset) = month_offset {
12926 let interval_str = format!(
12927 "{} MONTH",
12928 Self::expr_to_string_static(&offset)
12929 );
12930 Expression::Add(Box::new(
12931 crate::expressions::BinaryOp::new(
12932 date,
12933 Expression::Interval(Box::new(
12934 crate::expressions::Interval {
12935 this: Some(Expression::string(
12936 &interval_str,
12937 )),
12938 unit: None,
12939 },
12940 )),
12941 ),
12942 ))
12943 } else {
12944 date
12945 };
12946 let truncated =
12947 Expression::Function(Box::new(Function::new(
12948 "DATE_TRUNC".to_string(),
12949 vec![Expression::string("MONTH"), date],
12950 )));
12951 let plus_month = Expression::Add(Box::new(
12952 crate::expressions::BinaryOp::new(
12953 truncated,
12954 Expression::Interval(Box::new(
12955 crate::expressions::Interval {
12956 this: Some(Expression::string("1 MONTH")),
12957 unit: None,
12958 },
12959 )),
12960 ),
12961 ));
12962 let minus_day = Expression::Sub(Box::new(
12963 crate::expressions::BinaryOp::new(
12964 plus_month,
12965 Expression::Interval(Box::new(
12966 crate::expressions::Interval {
12967 this: Some(Expression::string("1 DAY")),
12968 unit: None,
12969 },
12970 )),
12971 ),
12972 ));
12973 Ok(Expression::Cast(Box::new(Cast {
12974 this: minus_day,
12975 to: DataType::Date,
12976 trailing_comments: vec![],
12977 double_colon_syntax: false,
12978 format: None,
12979 default: None,
12980 inferred_type: None,
12981 })))
12982 }
12983 DialectType::DuckDB => {
12984 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
12985 let date = cast_to_date(date_arg);
12986 let date = if let Some(offset) = month_offset {
12987 // Wrap negative numbers in parentheses for DuckDB INTERVAL
12988 let interval_val =
12989 if matches!(&offset, Expression::Neg(_)) {
12990 Expression::Paren(Box::new(
12991 crate::expressions::Paren {
12992 this: offset,
12993 trailing_comments: Vec::new(),
12994 },
12995 ))
12996 } else {
12997 offset
12998 };
12999 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
13000 date,
13001 Expression::Interval(Box::new(crate::expressions::Interval {
13002 this: Some(interval_val),
13003 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13004 unit: crate::expressions::IntervalUnit::Month,
13005 use_plural: false,
13006 }),
13007 })),
13008 )))
13009 } else {
13010 date
13011 };
13012 Ok(Expression::Function(Box::new(Function::new(
13013 "LAST_DAY".to_string(),
13014 vec![date],
13015 ))))
13016 }
13017 DialectType::Snowflake | DialectType::Redshift => {
13018 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
13019 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
13020 let date = if matches!(target, DialectType::Snowflake) {
13021 Expression::Function(Box::new(Function::new(
13022 "TO_DATE".to_string(),
13023 vec![date_arg],
13024 )))
13025 } else {
13026 cast_to_date(date_arg)
13027 };
13028 let date = if let Some(offset) = month_offset {
13029 Expression::Function(Box::new(Function::new(
13030 "DATEADD".to_string(),
13031 vec![
13032 Expression::Identifier(Identifier::new(
13033 "MONTH",
13034 )),
13035 offset,
13036 date,
13037 ],
13038 )))
13039 } else {
13040 date
13041 };
13042 Ok(Expression::Function(Box::new(Function::new(
13043 "LAST_DAY".to_string(),
13044 vec![date],
13045 ))))
13046 }
13047 DialectType::Spark | DialectType::Databricks => {
13048 // Spark: LAST_DAY(TO_DATE(date))
13049 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
13050 let date = Expression::Function(Box::new(Function::new(
13051 "TO_DATE".to_string(),
13052 vec![date_arg],
13053 )));
13054 let date = if let Some(offset) = month_offset {
13055 Expression::Function(Box::new(Function::new(
13056 "ADD_MONTHS".to_string(),
13057 vec![date, offset],
13058 )))
13059 } else {
13060 date
13061 };
13062 Ok(Expression::Function(Box::new(Function::new(
13063 "LAST_DAY".to_string(),
13064 vec![date],
13065 ))))
13066 }
13067 DialectType::MySQL => {
13068 // MySQL: LAST_DAY(DATE(date)) - no offset
13069 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
13070 let date = if let Some(offset) = month_offset {
13071 let iu = crate::expressions::IntervalUnit::Month;
13072 Expression::DateAdd(Box::new(
13073 crate::expressions::DateAddFunc {
13074 this: date_arg,
13075 interval: offset,
13076 unit: iu,
13077 },
13078 ))
13079 } else {
13080 Expression::Function(Box::new(Function::new(
13081 "DATE".to_string(),
13082 vec![date_arg],
13083 )))
13084 };
13085 Ok(Expression::Function(Box::new(Function::new(
13086 "LAST_DAY".to_string(),
13087 vec![date],
13088 ))))
13089 }
13090 DialectType::BigQuery => {
13091 // BigQuery: LAST_DAY(CAST(date AS DATE))
13092 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
13093 let date = cast_to_date(date_arg);
13094 let date = if let Some(offset) = month_offset {
13095 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13096 this: Some(offset),
13097 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13098 unit: crate::expressions::IntervalUnit::Month,
13099 use_plural: false,
13100 }),
13101 }));
13102 Expression::Function(Box::new(Function::new(
13103 "DATE_ADD".to_string(),
13104 vec![date, interval],
13105 )))
13106 } else {
13107 date
13108 };
13109 Ok(Expression::Function(Box::new(Function::new(
13110 "LAST_DAY".to_string(),
13111 vec![date],
13112 ))))
13113 }
13114 DialectType::ClickHouse => {
13115 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
13116 let date = Expression::Cast(Box::new(Cast {
13117 this: date_arg,
13118 to: DataType::Nullable {
13119 inner: Box::new(DataType::Date),
13120 },
13121 trailing_comments: vec![],
13122 double_colon_syntax: false,
13123 format: None,
13124 default: None,
13125 inferred_type: None,
13126 }));
13127 let date = if let Some(offset) = month_offset {
13128 Expression::Function(Box::new(Function::new(
13129 "DATE_ADD".to_string(),
13130 vec![
13131 Expression::Identifier(Identifier::new(
13132 "MONTH",
13133 )),
13134 offset,
13135 date,
13136 ],
13137 )))
13138 } else {
13139 date
13140 };
13141 Ok(Expression::Function(Box::new(Function::new(
13142 "LAST_DAY".to_string(),
13143 vec![date],
13144 ))))
13145 }
13146 DialectType::Hive => {
13147 // Hive: LAST_DAY(date)
13148 let date = if let Some(offset) = month_offset {
13149 Expression::Function(Box::new(Function::new(
13150 "ADD_MONTHS".to_string(),
13151 vec![date_arg, offset],
13152 )))
13153 } else {
13154 date_arg
13155 };
13156 Ok(Expression::Function(Box::new(Function::new(
13157 "LAST_DAY".to_string(),
13158 vec![date],
13159 ))))
13160 }
13161 _ => {
13162 // Default: LAST_DAY(date)
13163 let date = if let Some(offset) = month_offset {
13164 let unit =
13165 Expression::Identifier(Identifier::new("MONTH"));
13166 Expression::Function(Box::new(Function::new(
13167 "DATEADD".to_string(),
13168 vec![unit, offset, date_arg],
13169 )))
13170 } else {
13171 date_arg
13172 };
13173 Ok(Expression::Function(Box::new(Function::new(
13174 "LAST_DAY".to_string(),
13175 vec![date],
13176 ))))
13177 }
13178 }
13179 }
13180 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
13181 "LAST_DAY" | "LAST_DAY_OF_MONTH"
13182 if !matches!(source, DialectType::BigQuery)
13183 && f.args.len() >= 1 =>
13184 {
13185 let first_arg = f.args.into_iter().next().unwrap();
13186 match target {
13187 DialectType::TSQL | DialectType::Fabric => {
13188 Ok(Expression::Function(Box::new(Function::new(
13189 "EOMONTH".to_string(),
13190 vec![first_arg],
13191 ))))
13192 }
13193 DialectType::Presto
13194 | DialectType::Trino
13195 | DialectType::Athena => {
13196 Ok(Expression::Function(Box::new(Function::new(
13197 "LAST_DAY_OF_MONTH".to_string(),
13198 vec![first_arg],
13199 ))))
13200 }
13201 _ => Ok(Expression::Function(Box::new(Function::new(
13202 "LAST_DAY".to_string(),
13203 vec![first_arg],
13204 )))),
13205 }
13206 }
13207 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
13208 "MAP"
13209 if f.args.len() == 2
13210 && matches!(
13211 source,
13212 DialectType::Presto
13213 | DialectType::Trino
13214 | DialectType::Athena
13215 ) =>
13216 {
13217 let keys_arg = f.args[0].clone();
13218 let vals_arg = f.args[1].clone();
13219
13220 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
13221 fn extract_array_elements(
13222 expr: &Expression,
13223 ) -> Option<&Vec<Expression>> {
13224 match expr {
13225 Expression::Array(arr) => Some(&arr.expressions),
13226 Expression::ArrayFunc(arr) => Some(&arr.expressions),
13227 Expression::Function(f)
13228 if f.name.eq_ignore_ascii_case("ARRAY") =>
13229 {
13230 Some(&f.args)
13231 }
13232 _ => None,
13233 }
13234 }
13235
13236 match target {
13237 DialectType::Spark | DialectType::Databricks => {
13238 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
13239 Ok(Expression::Function(Box::new(Function::new(
13240 "MAP_FROM_ARRAYS".to_string(),
13241 f.args,
13242 ))))
13243 }
13244 DialectType::Hive => {
13245 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
13246 if let (Some(keys), Some(vals)) = (
13247 extract_array_elements(&keys_arg),
13248 extract_array_elements(&vals_arg),
13249 ) {
13250 if keys.len() == vals.len() {
13251 let mut interleaved = Vec::new();
13252 for (k, v) in keys.iter().zip(vals.iter()) {
13253 interleaved.push(k.clone());
13254 interleaved.push(v.clone());
13255 }
13256 Ok(Expression::Function(Box::new(Function::new(
13257 "MAP".to_string(),
13258 interleaved,
13259 ))))
13260 } else {
13261 Ok(Expression::Function(Box::new(Function::new(
13262 "MAP".to_string(),
13263 f.args,
13264 ))))
13265 }
13266 } else {
13267 Ok(Expression::Function(Box::new(Function::new(
13268 "MAP".to_string(),
13269 f.args,
13270 ))))
13271 }
13272 }
13273 DialectType::Snowflake => {
13274 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
13275 if let (Some(keys), Some(vals)) = (
13276 extract_array_elements(&keys_arg),
13277 extract_array_elements(&vals_arg),
13278 ) {
13279 if keys.len() == vals.len() {
13280 let mut interleaved = Vec::new();
13281 for (k, v) in keys.iter().zip(vals.iter()) {
13282 interleaved.push(k.clone());
13283 interleaved.push(v.clone());
13284 }
13285 Ok(Expression::Function(Box::new(Function::new(
13286 "OBJECT_CONSTRUCT".to_string(),
13287 interleaved,
13288 ))))
13289 } else {
13290 Ok(Expression::Function(Box::new(Function::new(
13291 "MAP".to_string(),
13292 f.args,
13293 ))))
13294 }
13295 } else {
13296 Ok(Expression::Function(Box::new(Function::new(
13297 "MAP".to_string(),
13298 f.args,
13299 ))))
13300 }
13301 }
13302 _ => Ok(Expression::Function(f)),
13303 }
13304 }
13305 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
13306 "MAP"
13307 if f.args.is_empty()
13308 && matches!(
13309 source,
13310 DialectType::Hive
13311 | DialectType::Spark
13312 | DialectType::Databricks
13313 )
13314 && matches!(
13315 target,
13316 DialectType::Presto
13317 | DialectType::Trino
13318 | DialectType::Athena
13319 ) =>
13320 {
13321 let empty_keys =
13322 Expression::Array(Box::new(crate::expressions::Array {
13323 expressions: vec![],
13324 }));
13325 let empty_vals =
13326 Expression::Array(Box::new(crate::expressions::Array {
13327 expressions: vec![],
13328 }));
13329 Ok(Expression::Function(Box::new(Function::new(
13330 "MAP".to_string(),
13331 vec![empty_keys, empty_vals],
13332 ))))
13333 }
13334 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
13335 "MAP"
13336 if f.args.len() >= 2
13337 && f.args.len() % 2 == 0
13338 && matches!(
13339 source,
13340 DialectType::Hive
13341 | DialectType::Spark
13342 | DialectType::Databricks
13343 | DialectType::ClickHouse
13344 ) =>
13345 {
13346 let args = f.args;
13347 match target {
13348 DialectType::DuckDB => {
13349 // MAP([k1, k2], [v1, v2])
13350 let mut keys = Vec::new();
13351 let mut vals = Vec::new();
13352 for (i, arg) in args.into_iter().enumerate() {
13353 if i % 2 == 0 {
13354 keys.push(arg);
13355 } else {
13356 vals.push(arg);
13357 }
13358 }
13359 let keys_arr = Expression::Array(Box::new(
13360 crate::expressions::Array { expressions: keys },
13361 ));
13362 let vals_arr = Expression::Array(Box::new(
13363 crate::expressions::Array { expressions: vals },
13364 ));
13365 Ok(Expression::Function(Box::new(Function::new(
13366 "MAP".to_string(),
13367 vec![keys_arr, vals_arr],
13368 ))))
13369 }
13370 DialectType::Presto | DialectType::Trino => {
13371 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
13372 let mut keys = Vec::new();
13373 let mut vals = Vec::new();
13374 for (i, arg) in args.into_iter().enumerate() {
13375 if i % 2 == 0 {
13376 keys.push(arg);
13377 } else {
13378 vals.push(arg);
13379 }
13380 }
13381 let keys_arr = Expression::Array(Box::new(
13382 crate::expressions::Array { expressions: keys },
13383 ));
13384 let vals_arr = Expression::Array(Box::new(
13385 crate::expressions::Array { expressions: vals },
13386 ));
13387 Ok(Expression::Function(Box::new(Function::new(
13388 "MAP".to_string(),
13389 vec![keys_arr, vals_arr],
13390 ))))
13391 }
13392 DialectType::Snowflake => Ok(Expression::Function(Box::new(
13393 Function::new("OBJECT_CONSTRUCT".to_string(), args),
13394 ))),
13395 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
13396 Function::new("map".to_string(), args),
13397 ))),
13398 _ => Ok(Expression::Function(Box::new(Function::new(
13399 "MAP".to_string(),
13400 args,
13401 )))),
13402 }
13403 }
13404 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
13405 "COLLECT_LIST" if f.args.len() >= 1 => {
13406 let name = match target {
13407 DialectType::Spark
13408 | DialectType::Databricks
13409 | DialectType::Hive => "COLLECT_LIST",
13410 DialectType::DuckDB
13411 | DialectType::PostgreSQL
13412 | DialectType::Redshift
13413 | DialectType::Snowflake
13414 | DialectType::BigQuery => "ARRAY_AGG",
13415 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
13416 _ => "ARRAY_AGG",
13417 };
13418 Ok(Expression::Function(Box::new(Function::new(
13419 name.to_string(),
13420 f.args,
13421 ))))
13422 }
13423 // COLLECT_SET(x) -> target-specific distinct array aggregation
13424 "COLLECT_SET" if f.args.len() >= 1 => {
13425 let name = match target {
13426 DialectType::Spark
13427 | DialectType::Databricks
13428 | DialectType::Hive => "COLLECT_SET",
13429 DialectType::Presto
13430 | DialectType::Trino
13431 | DialectType::Athena => "SET_AGG",
13432 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
13433 _ => "ARRAY_AGG",
13434 };
13435 Ok(Expression::Function(Box::new(Function::new(
13436 name.to_string(),
13437 f.args,
13438 ))))
13439 }
13440 // ISNAN(x) / IS_NAN(x) - normalize
13441 "ISNAN" | "IS_NAN" => {
13442 let name = match target {
13443 DialectType::Spark
13444 | DialectType::Databricks
13445 | DialectType::Hive => "ISNAN",
13446 DialectType::Presto
13447 | DialectType::Trino
13448 | DialectType::Athena => "IS_NAN",
13449 DialectType::BigQuery
13450 | DialectType::PostgreSQL
13451 | DialectType::Redshift => "IS_NAN",
13452 DialectType::ClickHouse => "IS_NAN",
13453 _ => "ISNAN",
13454 };
13455 Ok(Expression::Function(Box::new(Function::new(
13456 name.to_string(),
13457 f.args,
13458 ))))
13459 }
13460 // SPLIT_PART(str, delim, index) -> target-specific
13461 "SPLIT_PART" if f.args.len() == 3 => {
13462 match target {
13463 DialectType::Spark | DialectType::Databricks => {
13464 // Keep as SPLIT_PART (Spark 3.4+)
13465 Ok(Expression::Function(Box::new(Function::new(
13466 "SPLIT_PART".to_string(),
13467 f.args,
13468 ))))
13469 }
13470 DialectType::DuckDB
13471 if matches!(source, DialectType::Snowflake) =>
13472 {
13473 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
13474 // - part_index 0 treated as 1
13475 // - empty delimiter: return whole string if index 1 or -1, else ''
13476 let mut args = f.args;
13477 let str_arg = args.remove(0);
13478 let delim_arg = args.remove(0);
13479 let idx_arg = args.remove(0);
13480
13481 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
13482 let adjusted_idx = Expression::Paren(Box::new(Paren {
13483 this: Expression::Case(Box::new(Case {
13484 operand: None,
13485 whens: vec![(
13486 Expression::Eq(Box::new(BinaryOp {
13487 left: idx_arg.clone(),
13488 right: Expression::number(0),
13489 left_comments: vec![],
13490 operator_comments: vec![],
13491 trailing_comments: vec![],
13492 inferred_type: None,
13493 })),
13494 Expression::number(1),
13495 )],
13496 else_: Some(idx_arg.clone()),
13497 comments: vec![],
13498 inferred_type: None,
13499 })),
13500 trailing_comments: vec![],
13501 }));
13502
13503 // SPLIT_PART(str, delim, adjusted_idx)
13504 let base_func =
13505 Expression::Function(Box::new(Function::new(
13506 "SPLIT_PART".to_string(),
13507 vec![
13508 str_arg.clone(),
13509 delim_arg.clone(),
13510 adjusted_idx.clone(),
13511 ],
13512 )));
13513
13514 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
13515 let empty_delim_case = Expression::Paren(Box::new(Paren {
13516 this: Expression::Case(Box::new(Case {
13517 operand: None,
13518 whens: vec![(
13519 Expression::Or(Box::new(BinaryOp {
13520 left: Expression::Eq(Box::new(BinaryOp {
13521 left: adjusted_idx.clone(),
13522 right: Expression::number(1),
13523 left_comments: vec![],
13524 operator_comments: vec![],
13525 trailing_comments: vec![],
13526 inferred_type: None,
13527 })),
13528 right: Expression::Eq(Box::new(BinaryOp {
13529 left: adjusted_idx,
13530 right: Expression::number(-1),
13531 left_comments: vec![],
13532 operator_comments: vec![],
13533 trailing_comments: vec![],
13534 inferred_type: None,
13535 })),
13536 left_comments: vec![],
13537 operator_comments: vec![],
13538 trailing_comments: vec![],
13539 inferred_type: None,
13540 })),
13541 str_arg,
13542 )],
13543 else_: Some(Expression::string("")),
13544 comments: vec![],
13545 inferred_type: None,
13546 })),
13547 trailing_comments: vec![],
13548 }));
13549
13550 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
13551 Ok(Expression::Case(Box::new(Case {
13552 operand: None,
13553 whens: vec![(
13554 Expression::Eq(Box::new(BinaryOp {
13555 left: delim_arg,
13556 right: Expression::string(""),
13557 left_comments: vec![],
13558 operator_comments: vec![],
13559 trailing_comments: vec![],
13560 inferred_type: None,
13561 })),
13562 empty_delim_case,
13563 )],
13564 else_: Some(base_func),
13565 comments: vec![],
13566 inferred_type: None,
13567 })))
13568 }
13569 DialectType::DuckDB
13570 | DialectType::PostgreSQL
13571 | DialectType::Snowflake
13572 | DialectType::Redshift
13573 | DialectType::Trino
13574 | DialectType::Presto => Ok(Expression::Function(Box::new(
13575 Function::new("SPLIT_PART".to_string(), f.args),
13576 ))),
13577 DialectType::Hive => {
13578 // SPLIT(str, delim)[index]
13579 // Complex conversion, just keep as-is for now
13580 Ok(Expression::Function(Box::new(Function::new(
13581 "SPLIT_PART".to_string(),
13582 f.args,
13583 ))))
13584 }
13585 _ => Ok(Expression::Function(Box::new(Function::new(
13586 "SPLIT_PART".to_string(),
13587 f.args,
13588 )))),
13589 }
13590 }
13591 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
13592 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
13593 let is_scalar = name == "JSON_EXTRACT_SCALAR";
13594 match target {
13595 DialectType::Spark
13596 | DialectType::Databricks
13597 | DialectType::Hive => {
13598 let mut args = f.args;
13599 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
13600 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
13601 if let Some(Expression::Function(inner)) = args.first() {
13602 if inner.name.eq_ignore_ascii_case("TRY")
13603 && inner.args.len() == 1
13604 {
13605 let mut inner_args = inner.args.clone();
13606 args[0] = inner_args.remove(0);
13607 }
13608 }
13609 Ok(Expression::Function(Box::new(Function::new(
13610 "GET_JSON_OBJECT".to_string(),
13611 args,
13612 ))))
13613 }
13614 DialectType::DuckDB | DialectType::SQLite => {
13615 // json -> path syntax
13616 let mut args = f.args;
13617 let json_expr = args.remove(0);
13618 let path = args.remove(0);
13619 Ok(Expression::JsonExtract(Box::new(
13620 crate::expressions::JsonExtractFunc {
13621 this: json_expr,
13622 path,
13623 returning: None,
13624 arrow_syntax: true,
13625 hash_arrow_syntax: false,
13626 wrapper_option: None,
13627 quotes_option: None,
13628 on_scalar_string: false,
13629 on_error: None,
13630 },
13631 )))
13632 }
13633 DialectType::TSQL => {
13634 let func_name = if is_scalar {
13635 "JSON_VALUE"
13636 } else {
13637 "JSON_QUERY"
13638 };
13639 Ok(Expression::Function(Box::new(Function::new(
13640 func_name.to_string(),
13641 f.args,
13642 ))))
13643 }
13644 DialectType::PostgreSQL | DialectType::Redshift => {
13645 let func_name = if is_scalar {
13646 "JSON_EXTRACT_PATH_TEXT"
13647 } else {
13648 "JSON_EXTRACT_PATH"
13649 };
13650 Ok(Expression::Function(Box::new(Function::new(
13651 func_name.to_string(),
13652 f.args,
13653 ))))
13654 }
13655 _ => Ok(Expression::Function(Box::new(Function::new(
13656 name.to_string(),
13657 f.args,
13658 )))),
13659 }
13660 }
13661 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
13662 "JSON_SEARCH"
13663 if matches!(target, DialectType::DuckDB)
13664 && (3..=5).contains(&f.args.len()) =>
13665 {
13666 let args = &f.args;
13667
13668 // Only rewrite deterministic modes and NULL/no escape-char variant.
13669 let mode = match &args[1] {
13670 Expression::Literal(lit)
13671 if matches!(
13672 lit.as_ref(),
13673 crate::expressions::Literal::String(_)
13674 ) =>
13675 {
13676 let crate::expressions::Literal::String(s) = lit.as_ref()
13677 else {
13678 unreachable!()
13679 };
13680 s.to_ascii_lowercase()
13681 }
13682 _ => return Ok(Expression::Function(f)),
13683 };
13684 if mode != "one" && mode != "all" {
13685 return Ok(Expression::Function(f));
13686 }
13687 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
13688 return Ok(Expression::Function(f));
13689 }
13690
13691 let json_doc_sql = match Generator::sql(&args[0]) {
13692 Ok(sql) => sql,
13693 Err(_) => return Ok(Expression::Function(f)),
13694 };
13695 let search_sql = match Generator::sql(&args[2]) {
13696 Ok(sql) => sql,
13697 Err(_) => return Ok(Expression::Function(f)),
13698 };
13699 let path_sql = if args.len() == 5 {
13700 match Generator::sql(&args[4]) {
13701 Ok(sql) => sql,
13702 Err(_) => return Ok(Expression::Function(f)),
13703 }
13704 } else {
13705 "'$'".to_string()
13706 };
13707
13708 let rewrite_sql = if mode == "all" {
13709 format!(
13710 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
13711 json_doc_sql, path_sql, search_sql
13712 )
13713 } else {
13714 format!(
13715 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
13716 json_doc_sql, path_sql, search_sql
13717 )
13718 };
13719
13720 Ok(Expression::Raw(crate::expressions::Raw {
13721 sql: rewrite_sql,
13722 }))
13723 }
13724 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
13725 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
13726 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
13727 if f.args.len() >= 2
13728 && matches!(source, DialectType::SingleStore) =>
13729 {
13730 let is_bson = name == "BSON_EXTRACT_BSON";
13731 let mut args = f.args;
13732 let json_expr = args.remove(0);
13733
13734 // Build JSONPath from remaining arguments
13735 let mut path = String::from("$");
13736 for arg in &args {
13737 if let Expression::Literal(lit) = arg {
13738 if let crate::expressions::Literal::String(s) = lit.as_ref()
13739 {
13740 // Check if it's a numeric string (array index)
13741 if s.parse::<i64>().is_ok() {
13742 path.push('[');
13743 path.push_str(s);
13744 path.push(']');
13745 } else {
13746 path.push('.');
13747 path.push_str(s);
13748 }
13749 }
13750 }
13751 }
13752
13753 let target_func = if is_bson {
13754 "JSONB_EXTRACT"
13755 } else {
13756 "JSON_EXTRACT"
13757 };
13758 Ok(Expression::Function(Box::new(Function::new(
13759 target_func.to_string(),
13760 vec![json_expr, Expression::string(&path)],
13761 ))))
13762 }
13763 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
13764 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
13765 Ok(Expression::Function(Box::new(Function {
13766 name: "arraySum".to_string(),
13767 args: f.args,
13768 distinct: f.distinct,
13769 trailing_comments: f.trailing_comments,
13770 use_bracket_syntax: f.use_bracket_syntax,
13771 no_parens: f.no_parens,
13772 quoted: f.quoted,
13773 span: None,
13774 inferred_type: None,
13775 })))
13776 }
13777 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
13778 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
13779 // and is handled by JsonQueryValueConvert action. This handles the case where
13780 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
13781 "JSON_QUERY" | "JSON_VALUE"
13782 if f.args.len() == 2
13783 && matches!(
13784 source,
13785 DialectType::TSQL | DialectType::Fabric
13786 ) =>
13787 {
13788 match target {
13789 DialectType::Spark
13790 | DialectType::Databricks
13791 | DialectType::Hive => Ok(Expression::Function(Box::new(
13792 Function::new("GET_JSON_OBJECT".to_string(), f.args),
13793 ))),
13794 _ => Ok(Expression::Function(Box::new(Function::new(
13795 name.to_string(),
13796 f.args,
13797 )))),
13798 }
13799 }
13800 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
13801 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
13802 let arg = f.args.into_iter().next().unwrap();
13803 let is_hive_source = matches!(
13804 source,
13805 DialectType::Hive
13806 | DialectType::Spark
13807 | DialectType::Databricks
13808 );
13809 match target {
13810 DialectType::DuckDB if is_hive_source => {
13811 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
13812 let strptime =
13813 Expression::Function(Box::new(Function::new(
13814 "STRPTIME".to_string(),
13815 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
13816 )));
13817 Ok(Expression::Function(Box::new(Function::new(
13818 "EPOCH".to_string(),
13819 vec![strptime],
13820 ))))
13821 }
13822 DialectType::Presto | DialectType::Trino if is_hive_source => {
13823 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
13824 let cast_varchar =
13825 Expression::Cast(Box::new(crate::expressions::Cast {
13826 this: arg.clone(),
13827 to: DataType::VarChar {
13828 length: None,
13829 parenthesized_length: false,
13830 },
13831 trailing_comments: vec![],
13832 double_colon_syntax: false,
13833 format: None,
13834 default: None,
13835 inferred_type: None,
13836 }));
13837 let date_parse =
13838 Expression::Function(Box::new(Function::new(
13839 "DATE_PARSE".to_string(),
13840 vec![
13841 cast_varchar,
13842 Expression::string("%Y-%m-%d %T"),
13843 ],
13844 )));
13845 let try_expr = Expression::Function(Box::new(
13846 Function::new("TRY".to_string(), vec![date_parse]),
13847 ));
13848 let date_format =
13849 Expression::Function(Box::new(Function::new(
13850 "DATE_FORMAT".to_string(),
13851 vec![arg, Expression::string("%Y-%m-%d %T")],
13852 )));
13853 let parse_datetime =
13854 Expression::Function(Box::new(Function::new(
13855 "PARSE_DATETIME".to_string(),
13856 vec![
13857 date_format,
13858 Expression::string("yyyy-MM-dd HH:mm:ss"),
13859 ],
13860 )));
13861 let coalesce =
13862 Expression::Function(Box::new(Function::new(
13863 "COALESCE".to_string(),
13864 vec![try_expr, parse_datetime],
13865 )));
13866 Ok(Expression::Function(Box::new(Function::new(
13867 "TO_UNIXTIME".to_string(),
13868 vec![coalesce],
13869 ))))
13870 }
13871 DialectType::Presto | DialectType::Trino => {
13872 Ok(Expression::Function(Box::new(Function::new(
13873 "TO_UNIXTIME".to_string(),
13874 vec![arg],
13875 ))))
13876 }
13877 _ => Ok(Expression::Function(Box::new(Function::new(
13878 "UNIX_TIMESTAMP".to_string(),
13879 vec![arg],
13880 )))),
13881 }
13882 }
13883 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
13884 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
13885 DialectType::Spark
13886 | DialectType::Databricks
13887 | DialectType::Hive => Ok(Expression::Function(Box::new(
13888 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
13889 ))),
13890 _ => Ok(Expression::Function(Box::new(Function::new(
13891 "TO_UNIX_TIMESTAMP".to_string(),
13892 f.args,
13893 )))),
13894 },
13895 // CURDATE() -> CURRENT_DATE
13896 "CURDATE" => {
13897 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
13898 }
13899 // CURTIME() -> CURRENT_TIME
13900 "CURTIME" => {
13901 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
13902 precision: None,
13903 }))
13904 }
13905 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
13906 "ARRAY_SORT" if f.args.len() >= 1 => {
13907 match target {
13908 DialectType::Hive => {
13909 let mut args = f.args;
13910 args.truncate(1); // Drop lambda comparator
13911 Ok(Expression::Function(Box::new(Function::new(
13912 "SORT_ARRAY".to_string(),
13913 args,
13914 ))))
13915 }
13916 DialectType::DuckDB
13917 if matches!(source, DialectType::Snowflake) =>
13918 {
13919 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
13920 let mut args_iter = f.args.into_iter();
13921 let arr = args_iter.next().unwrap();
13922 let asc_arg = args_iter.next();
13923 let nulls_first_arg = args_iter.next();
13924
13925 let is_asc_bool = asc_arg
13926 .as_ref()
13927 .map(|a| matches!(a, Expression::Boolean(_)))
13928 .unwrap_or(false);
13929 let is_nf_bool = nulls_first_arg
13930 .as_ref()
13931 .map(|a| matches!(a, Expression::Boolean(_)))
13932 .unwrap_or(false);
13933
13934 // No boolean args: pass through as-is
13935 if !is_asc_bool && !is_nf_bool {
13936 let mut result_args = vec![arr];
13937 if let Some(asc) = asc_arg {
13938 result_args.push(asc);
13939 if let Some(nf) = nulls_first_arg {
13940 result_args.push(nf);
13941 }
13942 }
13943 Ok(Expression::Function(Box::new(Function::new(
13944 "LIST_SORT".to_string(),
13945 result_args,
13946 ))))
13947 } else {
13948 // Has boolean args: convert to DuckDB LIST_SORT format
13949 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
13950
13951 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
13952 let nulls_are_first = match &nulls_first_arg {
13953 Some(Expression::Boolean(b)) => b.value,
13954 None if is_asc_bool => descending, // Snowflake default
13955 _ => false,
13956 };
13957 let nulls_first_sql = if nulls_are_first {
13958 Some(Expression::string("NULLS FIRST"))
13959 } else {
13960 None
13961 };
13962
13963 if !is_asc_bool {
13964 // asc is non-boolean expression, nulls_first is boolean
13965 let mut result_args = vec![arr];
13966 if let Some(asc) = asc_arg {
13967 result_args.push(asc);
13968 }
13969 if let Some(nf) = nulls_first_sql {
13970 result_args.push(nf);
13971 }
13972 Ok(Expression::Function(Box::new(Function::new(
13973 "LIST_SORT".to_string(),
13974 result_args,
13975 ))))
13976 } else {
13977 if !descending && !nulls_are_first {
13978 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
13979 Ok(Expression::Function(Box::new(
13980 Function::new(
13981 "LIST_SORT".to_string(),
13982 vec![arr],
13983 ),
13984 )))
13985 } else if descending && !nulls_are_first {
13986 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
13987 Ok(Expression::Function(Box::new(
13988 Function::new(
13989 "ARRAY_REVERSE_SORT".to_string(),
13990 vec![arr],
13991 ),
13992 )))
13993 } else {
13994 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
13995 let order_str =
13996 if descending { "DESC" } else { "ASC" };
13997 Ok(Expression::Function(Box::new(
13998 Function::new(
13999 "LIST_SORT".to_string(),
14000 vec![
14001 arr,
14002 Expression::string(order_str),
14003 Expression::string("NULLS FIRST"),
14004 ],
14005 ),
14006 )))
14007 }
14008 }
14009 }
14010 }
14011 DialectType::DuckDB => {
14012 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
14013 let mut args = f.args;
14014 args.truncate(1); // Drop lambda comparator for DuckDB
14015 Ok(Expression::Function(Box::new(Function::new(
14016 "ARRAY_SORT".to_string(),
14017 args,
14018 ))))
14019 }
14020 _ => Ok(Expression::Function(f)),
14021 }
14022 }
14023 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
14024 "SORT_ARRAY" if f.args.len() == 1 => match target {
14025 DialectType::Hive
14026 | DialectType::Spark
14027 | DialectType::Databricks => Ok(Expression::Function(f)),
14028 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14029 Function::new("LIST_SORT".to_string(), f.args),
14030 ))),
14031 _ => Ok(Expression::Function(Box::new(Function::new(
14032 "ARRAY_SORT".to_string(),
14033 f.args,
14034 )))),
14035 },
14036 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
14037 "SORT_ARRAY" if f.args.len() == 2 => {
14038 let is_desc =
14039 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
14040 if is_desc {
14041 match target {
14042 DialectType::DuckDB => {
14043 Ok(Expression::Function(Box::new(Function::new(
14044 "ARRAY_REVERSE_SORT".to_string(),
14045 vec![f.args.into_iter().next().unwrap()],
14046 ))))
14047 }
14048 DialectType::Presto | DialectType::Trino => {
14049 let arr_arg = f.args.into_iter().next().unwrap();
14050 let a = Expression::Column(Box::new(
14051 crate::expressions::Column {
14052 name: crate::expressions::Identifier::new("a"),
14053 table: None,
14054 join_mark: false,
14055 trailing_comments: Vec::new(),
14056 span: None,
14057 inferred_type: None,
14058 },
14059 ));
14060 let b = Expression::Column(Box::new(
14061 crate::expressions::Column {
14062 name: crate::expressions::Identifier::new("b"),
14063 table: None,
14064 join_mark: false,
14065 trailing_comments: Vec::new(),
14066 span: None,
14067 inferred_type: None,
14068 },
14069 ));
14070 let case_expr = Expression::Case(Box::new(
14071 crate::expressions::Case {
14072 operand: None,
14073 whens: vec![
14074 (
14075 Expression::Lt(Box::new(
14076 BinaryOp::new(a.clone(), b.clone()),
14077 )),
14078 Expression::Literal(Box::new(
14079 Literal::Number("1".to_string()),
14080 )),
14081 ),
14082 (
14083 Expression::Gt(Box::new(
14084 BinaryOp::new(a.clone(), b.clone()),
14085 )),
14086 Expression::Literal(Box::new(
14087 Literal::Number("-1".to_string()),
14088 )),
14089 ),
14090 ],
14091 else_: Some(Expression::Literal(Box::new(
14092 Literal::Number("0".to_string()),
14093 ))),
14094 comments: Vec::new(),
14095 inferred_type: None,
14096 },
14097 ));
14098 let lambda = Expression::Lambda(Box::new(
14099 crate::expressions::LambdaExpr {
14100 parameters: vec![
14101 crate::expressions::Identifier::new("a"),
14102 crate::expressions::Identifier::new("b"),
14103 ],
14104 body: case_expr,
14105 colon: false,
14106 parameter_types: Vec::new(),
14107 },
14108 ));
14109 Ok(Expression::Function(Box::new(Function::new(
14110 "ARRAY_SORT".to_string(),
14111 vec![arr_arg, lambda],
14112 ))))
14113 }
14114 _ => Ok(Expression::Function(f)),
14115 }
14116 } else {
14117 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
14118 match target {
14119 DialectType::Hive => Ok(Expression::Function(f)),
14120 DialectType::DuckDB => {
14121 Ok(Expression::Function(Box::new(Function::new(
14122 "LIST_SORT".to_string(),
14123 vec![f.args.into_iter().next().unwrap()],
14124 ))))
14125 }
14126 _ => Ok(Expression::Function(Box::new(Function::new(
14127 "ARRAY_SORT".to_string(),
14128 vec![f.args.into_iter().next().unwrap()],
14129 )))),
14130 }
14131 }
14132 }
14133 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
14134 "LEFT" if f.args.len() == 2 => {
14135 match target {
14136 DialectType::Hive
14137 | DialectType::Presto
14138 | DialectType::Trino
14139 | DialectType::Athena => {
14140 let x = f.args[0].clone();
14141 let n = f.args[1].clone();
14142 Ok(Expression::Function(Box::new(Function::new(
14143 "SUBSTRING".to_string(),
14144 vec![x, Expression::number(1), n],
14145 ))))
14146 }
14147 DialectType::Spark | DialectType::Databricks
14148 if matches!(
14149 source,
14150 DialectType::TSQL | DialectType::Fabric
14151 ) =>
14152 {
14153 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
14154 let x = f.args[0].clone();
14155 let n = f.args[1].clone();
14156 let cast_x = Expression::Cast(Box::new(Cast {
14157 this: x,
14158 to: DataType::VarChar {
14159 length: None,
14160 parenthesized_length: false,
14161 },
14162 double_colon_syntax: false,
14163 trailing_comments: Vec::new(),
14164 format: None,
14165 default: None,
14166 inferred_type: None,
14167 }));
14168 Ok(Expression::Function(Box::new(Function::new(
14169 "LEFT".to_string(),
14170 vec![cast_x, n],
14171 ))))
14172 }
14173 _ => Ok(Expression::Function(f)),
14174 }
14175 }
14176 "RIGHT" if f.args.len() == 2 => {
14177 match target {
14178 DialectType::Hive
14179 | DialectType::Presto
14180 | DialectType::Trino
14181 | DialectType::Athena => {
14182 let x = f.args[0].clone();
14183 let n = f.args[1].clone();
14184 // SUBSTRING(x, LENGTH(x) - (n - 1))
14185 let len_x = Expression::Function(Box::new(Function::new(
14186 "LENGTH".to_string(),
14187 vec![x.clone()],
14188 )));
14189 let n_minus_1 = Expression::Sub(Box::new(
14190 crate::expressions::BinaryOp::new(
14191 n,
14192 Expression::number(1),
14193 ),
14194 ));
14195 let n_minus_1_paren = Expression::Paren(Box::new(
14196 crate::expressions::Paren {
14197 this: n_minus_1,
14198 trailing_comments: Vec::new(),
14199 },
14200 ));
14201 let offset = Expression::Sub(Box::new(
14202 crate::expressions::BinaryOp::new(
14203 len_x,
14204 n_minus_1_paren,
14205 ),
14206 ));
14207 Ok(Expression::Function(Box::new(Function::new(
14208 "SUBSTRING".to_string(),
14209 vec![x, offset],
14210 ))))
14211 }
14212 DialectType::Spark | DialectType::Databricks
14213 if matches!(
14214 source,
14215 DialectType::TSQL | DialectType::Fabric
14216 ) =>
14217 {
14218 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
14219 let x = f.args[0].clone();
14220 let n = f.args[1].clone();
14221 let cast_x = Expression::Cast(Box::new(Cast {
14222 this: x,
14223 to: DataType::VarChar {
14224 length: None,
14225 parenthesized_length: false,
14226 },
14227 double_colon_syntax: false,
14228 trailing_comments: Vec::new(),
14229 format: None,
14230 default: None,
14231 inferred_type: None,
14232 }));
14233 Ok(Expression::Function(Box::new(Function::new(
14234 "RIGHT".to_string(),
14235 vec![cast_x, n],
14236 ))))
14237 }
14238 _ => Ok(Expression::Function(f)),
14239 }
14240 }
14241 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
14242 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
14243 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14244 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
14245 ))),
14246 DialectType::Spark | DialectType::Databricks => {
14247 Ok(Expression::Function(Box::new(Function::new(
14248 "MAP_FROM_ARRAYS".to_string(),
14249 f.args,
14250 ))))
14251 }
14252 _ => Ok(Expression::Function(Box::new(Function::new(
14253 "MAP".to_string(),
14254 f.args,
14255 )))),
14256 },
14257 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
14258 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
14259 "LIKE" if f.args.len() >= 2 => {
14260 let (this, pattern) = if matches!(source, DialectType::SQLite) {
14261 // SQLite: LIKE(pattern, string) -> string LIKE pattern
14262 (f.args[1].clone(), f.args[0].clone())
14263 } else {
14264 // Standard: LIKE(string, pattern) -> string LIKE pattern
14265 (f.args[0].clone(), f.args[1].clone())
14266 };
14267 let escape = if f.args.len() >= 3 {
14268 Some(f.args[2].clone())
14269 } else {
14270 None
14271 };
14272 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
14273 left: this,
14274 right: pattern,
14275 escape,
14276 quantifier: None,
14277 inferred_type: None,
14278 })))
14279 }
14280 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
14281 "ILIKE" if f.args.len() >= 2 => {
14282 let this = f.args[0].clone();
14283 let pattern = f.args[1].clone();
14284 let escape = if f.args.len() >= 3 {
14285 Some(f.args[2].clone())
14286 } else {
14287 None
14288 };
14289 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
14290 left: this,
14291 right: pattern,
14292 escape,
14293 quantifier: None,
14294 inferred_type: None,
14295 })))
14296 }
14297 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
14298 "CHAR" if f.args.len() == 1 => match target {
14299 DialectType::MySQL
14300 | DialectType::SingleStore
14301 | DialectType::TSQL => Ok(Expression::Function(f)),
14302 _ => Ok(Expression::Function(Box::new(Function::new(
14303 "CHR".to_string(),
14304 f.args,
14305 )))),
14306 },
14307 // CONCAT(a, b) -> a || b for PostgreSQL
14308 "CONCAT"
14309 if f.args.len() == 2
14310 && matches!(target, DialectType::PostgreSQL)
14311 && matches!(
14312 source,
14313 DialectType::ClickHouse | DialectType::MySQL
14314 ) =>
14315 {
14316 let mut args = f.args;
14317 let right = args.pop().unwrap();
14318 let left = args.pop().unwrap();
14319 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
14320 this: Box::new(left),
14321 expression: Box::new(right),
14322 safe: None,
14323 })))
14324 }
14325 // ARRAY_TO_STRING(arr, delim) -> target-specific
14326 "ARRAY_TO_STRING"
14327 if f.args.len() == 2
14328 && matches!(target, DialectType::DuckDB)
14329 && matches!(source, DialectType::Snowflake) =>
14330 {
14331 let mut args = f.args;
14332 let arr = args.remove(0);
14333 let sep = args.remove(0);
14334 // sep IS NULL
14335 let sep_is_null = Expression::IsNull(Box::new(IsNull {
14336 this: sep.clone(),
14337 not: false,
14338 postfix_form: false,
14339 }));
14340 // COALESCE(CAST(x AS TEXT), '')
14341 let cast_x = Expression::Cast(Box::new(Cast {
14342 this: Expression::Identifier(Identifier::new("x")),
14343 to: DataType::Text,
14344 trailing_comments: Vec::new(),
14345 double_colon_syntax: false,
14346 format: None,
14347 default: None,
14348 inferred_type: None,
14349 }));
14350 let coalesce = Expression::Coalesce(Box::new(
14351 crate::expressions::VarArgFunc {
14352 original_name: None,
14353 expressions: vec![
14354 cast_x,
14355 Expression::Literal(Box::new(Literal::String(
14356 String::new(),
14357 ))),
14358 ],
14359 inferred_type: None,
14360 },
14361 ));
14362 let lambda =
14363 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
14364 parameters: vec![Identifier::new("x")],
14365 body: coalesce,
14366 colon: false,
14367 parameter_types: Vec::new(),
14368 }));
14369 let list_transform = Expression::Function(Box::new(Function::new(
14370 "LIST_TRANSFORM".to_string(),
14371 vec![arr, lambda],
14372 )));
14373 let array_to_string =
14374 Expression::Function(Box::new(Function::new(
14375 "ARRAY_TO_STRING".to_string(),
14376 vec![list_transform, sep],
14377 )));
14378 Ok(Expression::Case(Box::new(Case {
14379 operand: None,
14380 whens: vec![(sep_is_null, Expression::Null(Null))],
14381 else_: Some(array_to_string),
14382 comments: Vec::new(),
14383 inferred_type: None,
14384 })))
14385 }
14386 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
14387 DialectType::Presto | DialectType::Trino => {
14388 Ok(Expression::Function(Box::new(Function::new(
14389 "ARRAY_JOIN".to_string(),
14390 f.args,
14391 ))))
14392 }
14393 DialectType::TSQL => Ok(Expression::Function(Box::new(
14394 Function::new("STRING_AGG".to_string(), f.args),
14395 ))),
14396 _ => Ok(Expression::Function(f)),
14397 },
14398 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
14399 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
14400 DialectType::Spark
14401 | DialectType::Databricks
14402 | DialectType::Hive => Ok(Expression::Function(Box::new(
14403 Function::new("CONCAT".to_string(), f.args),
14404 ))),
14405 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14406 Function::new("ARRAY_CAT".to_string(), f.args),
14407 ))),
14408 DialectType::Redshift => Ok(Expression::Function(Box::new(
14409 Function::new("ARRAY_CONCAT".to_string(), f.args),
14410 ))),
14411 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14412 Function::new("ARRAY_CAT".to_string(), f.args),
14413 ))),
14414 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14415 Function::new("LIST_CONCAT".to_string(), f.args),
14416 ))),
14417 DialectType::Presto | DialectType::Trino => {
14418 Ok(Expression::Function(Box::new(Function::new(
14419 "CONCAT".to_string(),
14420 f.args,
14421 ))))
14422 }
14423 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14424 Function::new("ARRAY_CONCAT".to_string(), f.args),
14425 ))),
14426 _ => Ok(Expression::Function(f)),
14427 },
14428 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
14429 "HAS" if f.args.len() == 2 => match target {
14430 DialectType::Spark
14431 | DialectType::Databricks
14432 | DialectType::Hive => Ok(Expression::Function(Box::new(
14433 Function::new("ARRAY_CONTAINS".to_string(), f.args),
14434 ))),
14435 DialectType::Presto | DialectType::Trino => {
14436 Ok(Expression::Function(Box::new(Function::new(
14437 "CONTAINS".to_string(),
14438 f.args,
14439 ))))
14440 }
14441 _ => Ok(Expression::Function(f)),
14442 },
14443 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
14444 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
14445 Function::new("COALESCE".to_string(), f.args),
14446 ))),
14447 // ISNULL(x) in MySQL -> (x IS NULL)
14448 "ISNULL"
14449 if f.args.len() == 1
14450 && matches!(source, DialectType::MySQL)
14451 && matches!(target, DialectType::MySQL) =>
14452 {
14453 let arg = f.args.into_iter().next().unwrap();
14454 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
14455 this: Expression::IsNull(Box::new(
14456 crate::expressions::IsNull {
14457 this: arg,
14458 not: false,
14459 postfix_form: false,
14460 },
14461 )),
14462 trailing_comments: Vec::new(),
14463 })))
14464 }
14465 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
14466 "MONTHNAME"
14467 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
14468 {
14469 let arg = f.args.into_iter().next().unwrap();
14470 Ok(Expression::Function(Box::new(Function::new(
14471 "DATE_FORMAT".to_string(),
14472 vec![arg, Expression::string("%M")],
14473 ))))
14474 }
14475 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
14476 "SPLITBYSTRING" if f.args.len() == 2 => {
14477 let sep = f.args[0].clone();
14478 let str_arg = f.args[1].clone();
14479 match target {
14480 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14481 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
14482 ))),
14483 DialectType::Doris => {
14484 Ok(Expression::Function(Box::new(Function::new(
14485 "SPLIT_BY_STRING".to_string(),
14486 vec![str_arg, sep],
14487 ))))
14488 }
14489 DialectType::Hive
14490 | DialectType::Spark
14491 | DialectType::Databricks => {
14492 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
14493 let escaped =
14494 Expression::Function(Box::new(Function::new(
14495 "CONCAT".to_string(),
14496 vec![
14497 Expression::string("\\Q"),
14498 sep,
14499 Expression::string("\\E"),
14500 ],
14501 )));
14502 Ok(Expression::Function(Box::new(Function::new(
14503 "SPLIT".to_string(),
14504 vec![str_arg, escaped],
14505 ))))
14506 }
14507 _ => Ok(Expression::Function(f)),
14508 }
14509 }
14510 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
14511 "SPLITBYREGEXP" if f.args.len() == 2 => {
14512 let sep = f.args[0].clone();
14513 let str_arg = f.args[1].clone();
14514 match target {
14515 DialectType::DuckDB => {
14516 Ok(Expression::Function(Box::new(Function::new(
14517 "STR_SPLIT_REGEX".to_string(),
14518 vec![str_arg, sep],
14519 ))))
14520 }
14521 DialectType::Hive
14522 | DialectType::Spark
14523 | DialectType::Databricks => {
14524 Ok(Expression::Function(Box::new(Function::new(
14525 "SPLIT".to_string(),
14526 vec![str_arg, sep],
14527 ))))
14528 }
14529 _ => Ok(Expression::Function(f)),
14530 }
14531 }
14532 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
14533 "TOMONDAY" => {
14534 if f.args.len() == 1 {
14535 let arg = f.args.into_iter().next().unwrap();
14536 match target {
14537 DialectType::Doris => {
14538 Ok(Expression::Function(Box::new(Function::new(
14539 "DATE_TRUNC".to_string(),
14540 vec![arg, Expression::string("WEEK")],
14541 ))))
14542 }
14543 _ => Ok(Expression::Function(Box::new(Function::new(
14544 "DATE_TRUNC".to_string(),
14545 vec![Expression::string("WEEK"), arg],
14546 )))),
14547 }
14548 } else {
14549 Ok(Expression::Function(f))
14550 }
14551 }
14552 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
14553 "COLLECT_LIST" if f.args.len() == 1 => match target {
14554 DialectType::Spark
14555 | DialectType::Databricks
14556 | DialectType::Hive => Ok(Expression::Function(f)),
14557 _ => Ok(Expression::Function(Box::new(Function::new(
14558 "ARRAY_AGG".to_string(),
14559 f.args,
14560 )))),
14561 },
14562 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
14563 "TO_CHAR"
14564 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
14565 {
14566 let arg = f.args.into_iter().next().unwrap();
14567 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14568 this: arg,
14569 to: DataType::Custom {
14570 name: "STRING".to_string(),
14571 },
14572 double_colon_syntax: false,
14573 trailing_comments: Vec::new(),
14574 format: None,
14575 default: None,
14576 inferred_type: None,
14577 })))
14578 }
14579 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
14580 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
14581 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14582 Function::new("RANDOM".to_string(), vec![]),
14583 ))),
14584 _ => Ok(Expression::Function(f)),
14585 },
14586 // ClickHouse formatDateTime -> target-specific
14587 "FORMATDATETIME" if f.args.len() >= 2 => match target {
14588 DialectType::MySQL => Ok(Expression::Function(Box::new(
14589 Function::new("DATE_FORMAT".to_string(), f.args),
14590 ))),
14591 _ => Ok(Expression::Function(f)),
14592 },
14593 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
14594 "REPLICATE" if f.args.len() == 2 => match target {
14595 DialectType::TSQL => Ok(Expression::Function(f)),
14596 _ => Ok(Expression::Function(Box::new(Function::new(
14597 "REPEAT".to_string(),
14598 f.args,
14599 )))),
14600 },
14601 // LEN(x) -> LENGTH(x) for non-TSQL targets
14602 // No CAST needed when arg is already a string literal
14603 "LEN" if f.args.len() == 1 => {
14604 match target {
14605 DialectType::TSQL => Ok(Expression::Function(f)),
14606 DialectType::Spark | DialectType::Databricks => {
14607 let arg = f.args.into_iter().next().unwrap();
14608 // Don't wrap string literals with CAST - they're already strings
14609 let is_string = matches!(
14610 &arg,
14611 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
14612 );
14613 let final_arg = if is_string {
14614 arg
14615 } else {
14616 Expression::Cast(Box::new(Cast {
14617 this: arg,
14618 to: DataType::VarChar {
14619 length: None,
14620 parenthesized_length: false,
14621 },
14622 double_colon_syntax: false,
14623 trailing_comments: Vec::new(),
14624 format: None,
14625 default: None,
14626 inferred_type: None,
14627 }))
14628 };
14629 Ok(Expression::Function(Box::new(Function::new(
14630 "LENGTH".to_string(),
14631 vec![final_arg],
14632 ))))
14633 }
14634 _ => {
14635 let arg = f.args.into_iter().next().unwrap();
14636 Ok(Expression::Function(Box::new(Function::new(
14637 "LENGTH".to_string(),
14638 vec![arg],
14639 ))))
14640 }
14641 }
14642 }
14643 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
14644 "COUNT_BIG" if f.args.len() == 1 => match target {
14645 DialectType::TSQL => Ok(Expression::Function(f)),
14646 _ => Ok(Expression::Function(Box::new(Function::new(
14647 "COUNT".to_string(),
14648 f.args,
14649 )))),
14650 },
14651 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
14652 "DATEFROMPARTS" if f.args.len() == 3 => match target {
14653 DialectType::TSQL => Ok(Expression::Function(f)),
14654 _ => Ok(Expression::Function(Box::new(Function::new(
14655 "MAKE_DATE".to_string(),
14656 f.args,
14657 )))),
14658 },
14659 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
14660 "REGEXP_LIKE" if f.args.len() >= 2 => {
14661 let str_expr = f.args[0].clone();
14662 let pattern = f.args[1].clone();
14663 let flags = if f.args.len() >= 3 {
14664 Some(f.args[2].clone())
14665 } else {
14666 None
14667 };
14668 match target {
14669 DialectType::DuckDB => {
14670 let mut new_args = vec![str_expr, pattern];
14671 if let Some(fl) = flags {
14672 new_args.push(fl);
14673 }
14674 Ok(Expression::Function(Box::new(Function::new(
14675 "REGEXP_MATCHES".to_string(),
14676 new_args,
14677 ))))
14678 }
14679 _ => Ok(Expression::RegexpLike(Box::new(
14680 crate::expressions::RegexpFunc {
14681 this: str_expr,
14682 pattern,
14683 flags,
14684 },
14685 ))),
14686 }
14687 }
14688 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
14689 "ARRAYJOIN" if f.args.len() == 1 => match target {
14690 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14691 Function::new("UNNEST".to_string(), f.args),
14692 ))),
14693 _ => Ok(Expression::Function(f)),
14694 },
14695 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
14696 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
14697 match target {
14698 DialectType::TSQL => Ok(Expression::Function(f)),
14699 DialectType::DuckDB => {
14700 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
14701 let mut args = f.args;
14702 let ms = args.pop().unwrap();
14703 let s = args.pop().unwrap();
14704 // s + (ms / 1000.0)
14705 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
14706 ms,
14707 Expression::Literal(Box::new(
14708 crate::expressions::Literal::Number(
14709 "1000.0".to_string(),
14710 ),
14711 )),
14712 )));
14713 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
14714 s,
14715 Expression::Paren(Box::new(Paren {
14716 this: ms_frac,
14717 trailing_comments: vec![],
14718 })),
14719 )));
14720 args.push(s_with_ms);
14721 Ok(Expression::Function(Box::new(Function::new(
14722 "MAKE_TIMESTAMP".to_string(),
14723 args,
14724 ))))
14725 }
14726 DialectType::Snowflake => {
14727 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
14728 let mut args = f.args;
14729 let ms = args.pop().unwrap();
14730 // ms * 1000000
14731 let ns = Expression::Mul(Box::new(BinaryOp::new(
14732 ms,
14733 Expression::number(1000000),
14734 )));
14735 args.push(ns);
14736 Ok(Expression::Function(Box::new(Function::new(
14737 "TIMESTAMP_FROM_PARTS".to_string(),
14738 args,
14739 ))))
14740 }
14741 _ => {
14742 // Default: keep function name for other targets
14743 Ok(Expression::Function(Box::new(Function::new(
14744 "DATETIMEFROMPARTS".to_string(),
14745 f.args,
14746 ))))
14747 }
14748 }
14749 }
14750 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
14751 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
14752 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
14753 let is_try = name == "TRY_CONVERT";
14754 let type_expr = f.args[0].clone();
14755 let value_expr = f.args[1].clone();
14756 let style = if f.args.len() >= 3 {
14757 Some(&f.args[2])
14758 } else {
14759 None
14760 };
14761
14762 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
14763 if matches!(target, DialectType::TSQL) {
14764 let normalized_type = match &type_expr {
14765 Expression::DataType(dt) => {
14766 let new_dt = match dt {
14767 DataType::Int { .. } => DataType::Custom {
14768 name: "INTEGER".to_string(),
14769 },
14770 _ => dt.clone(),
14771 };
14772 Expression::DataType(new_dt)
14773 }
14774 Expression::Identifier(id) => {
14775 if id.name.eq_ignore_ascii_case("INT") {
14776 Expression::Identifier(
14777 crate::expressions::Identifier::new("INTEGER"),
14778 )
14779 } else {
14780 let upper = id.name.to_ascii_uppercase();
14781 Expression::Identifier(
14782 crate::expressions::Identifier::new(upper),
14783 )
14784 }
14785 }
14786 Expression::Column(col) => {
14787 if col.name.name.eq_ignore_ascii_case("INT") {
14788 Expression::Identifier(
14789 crate::expressions::Identifier::new("INTEGER"),
14790 )
14791 } else {
14792 let upper = col.name.name.to_ascii_uppercase();
14793 Expression::Identifier(
14794 crate::expressions::Identifier::new(upper),
14795 )
14796 }
14797 }
14798 _ => type_expr.clone(),
14799 };
14800 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
14801 let mut new_args = vec![normalized_type, value_expr];
14802 if let Some(s) = style {
14803 new_args.push(s.clone());
14804 }
14805 return Ok(Expression::Function(Box::new(Function::new(
14806 func_name.to_string(),
14807 new_args,
14808 ))));
14809 }
14810
14811 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
14812 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
14813 match e {
14814 Expression::DataType(dt) => {
14815 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
14816 match dt {
14817 DataType::Custom { name }
14818 if name.starts_with("NVARCHAR(")
14819 || name.starts_with("NCHAR(") =>
14820 {
14821 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
14822 let inner = &name[name.find('(').unwrap() + 1
14823 ..name.len() - 1];
14824 if inner.eq_ignore_ascii_case("MAX") {
14825 Some(DataType::Text)
14826 } else if let Ok(len) = inner.parse::<u32>() {
14827 if name.starts_with("NCHAR") {
14828 Some(DataType::Char {
14829 length: Some(len),
14830 })
14831 } else {
14832 Some(DataType::VarChar {
14833 length: Some(len),
14834 parenthesized_length: false,
14835 })
14836 }
14837 } else {
14838 Some(dt.clone())
14839 }
14840 }
14841 DataType::Custom { name } if name == "NVARCHAR" => {
14842 Some(DataType::VarChar {
14843 length: None,
14844 parenthesized_length: false,
14845 })
14846 }
14847 DataType::Custom { name } if name == "NCHAR" => {
14848 Some(DataType::Char { length: None })
14849 }
14850 DataType::Custom { name }
14851 if name == "NVARCHAR(MAX)"
14852 || name == "VARCHAR(MAX)" =>
14853 {
14854 Some(DataType::Text)
14855 }
14856 _ => Some(dt.clone()),
14857 }
14858 }
14859 Expression::Identifier(id) => {
14860 let name = id.name.to_ascii_uppercase();
14861 match name.as_str() {
14862 "INT" | "INTEGER" => Some(DataType::Int {
14863 length: None,
14864 integer_spelling: false,
14865 }),
14866 "BIGINT" => Some(DataType::BigInt { length: None }),
14867 "SMALLINT" => {
14868 Some(DataType::SmallInt { length: None })
14869 }
14870 "TINYINT" => {
14871 Some(DataType::TinyInt { length: None })
14872 }
14873 "FLOAT" => Some(DataType::Float {
14874 precision: None,
14875 scale: None,
14876 real_spelling: false,
14877 }),
14878 "REAL" => Some(DataType::Float {
14879 precision: None,
14880 scale: None,
14881 real_spelling: true,
14882 }),
14883 "DATETIME" | "DATETIME2" => {
14884 Some(DataType::Timestamp {
14885 timezone: false,
14886 precision: None,
14887 })
14888 }
14889 "DATE" => Some(DataType::Date),
14890 "BIT" => Some(DataType::Boolean),
14891 "TEXT" => Some(DataType::Text),
14892 "NUMERIC" => Some(DataType::Decimal {
14893 precision: None,
14894 scale: None,
14895 }),
14896 "MONEY" => Some(DataType::Decimal {
14897 precision: Some(15),
14898 scale: Some(4),
14899 }),
14900 "SMALLMONEY" => Some(DataType::Decimal {
14901 precision: Some(6),
14902 scale: Some(4),
14903 }),
14904 "VARCHAR" => Some(DataType::VarChar {
14905 length: None,
14906 parenthesized_length: false,
14907 }),
14908 "NVARCHAR" => Some(DataType::VarChar {
14909 length: None,
14910 parenthesized_length: false,
14911 }),
14912 "CHAR" => Some(DataType::Char { length: None }),
14913 "NCHAR" => Some(DataType::Char { length: None }),
14914 _ => Some(DataType::Custom { name }),
14915 }
14916 }
14917 Expression::Column(col) => {
14918 let name = col.name.name.to_ascii_uppercase();
14919 match name.as_str() {
14920 "INT" | "INTEGER" => Some(DataType::Int {
14921 length: None,
14922 integer_spelling: false,
14923 }),
14924 "BIGINT" => Some(DataType::BigInt { length: None }),
14925 "FLOAT" => Some(DataType::Float {
14926 precision: None,
14927 scale: None,
14928 real_spelling: false,
14929 }),
14930 "DATETIME" | "DATETIME2" => {
14931 Some(DataType::Timestamp {
14932 timezone: false,
14933 precision: None,
14934 })
14935 }
14936 "DATE" => Some(DataType::Date),
14937 "NUMERIC" => Some(DataType::Decimal {
14938 precision: None,
14939 scale: None,
14940 }),
14941 "VARCHAR" => Some(DataType::VarChar {
14942 length: None,
14943 parenthesized_length: false,
14944 }),
14945 "NVARCHAR" => Some(DataType::VarChar {
14946 length: None,
14947 parenthesized_length: false,
14948 }),
14949 "CHAR" => Some(DataType::Char { length: None }),
14950 "NCHAR" => Some(DataType::Char { length: None }),
14951 _ => Some(DataType::Custom { name }),
14952 }
14953 }
14954 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
14955 Expression::Function(f) => {
14956 let fname = f.name.to_ascii_uppercase();
14957 match fname.as_str() {
14958 "VARCHAR" | "NVARCHAR" => {
14959 let len = f.args.first().and_then(|a| {
14960 if let Expression::Literal(lit) = a
14961 {
14962 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14963 n.parse::<u32>().ok()
14964 } else { None }
14965 } else if let Expression::Identifier(id) = a
14966 {
14967 if id.name.eq_ignore_ascii_case("MAX") {
14968 None
14969 } else {
14970 None
14971 }
14972 } else {
14973 None
14974 }
14975 });
14976 // Check for VARCHAR(MAX) -> TEXT
14977 let is_max = f.args.first().map_or(false, |a| {
14978 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
14979 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
14980 });
14981 if is_max {
14982 Some(DataType::Text)
14983 } else {
14984 Some(DataType::VarChar {
14985 length: len,
14986 parenthesized_length: false,
14987 })
14988 }
14989 }
14990 "NCHAR" | "CHAR" => {
14991 let len = f.args.first().and_then(|a| {
14992 if let Expression::Literal(lit) = a
14993 {
14994 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14995 n.parse::<u32>().ok()
14996 } else { None }
14997 } else {
14998 None
14999 }
15000 });
15001 Some(DataType::Char { length: len })
15002 }
15003 "NUMERIC" | "DECIMAL" => {
15004 let precision = f.args.first().and_then(|a| {
15005 if let Expression::Literal(lit) = a
15006 {
15007 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
15008 n.parse::<u32>().ok()
15009 } else { None }
15010 } else {
15011 None
15012 }
15013 });
15014 let scale = f.args.get(1).and_then(|a| {
15015 if let Expression::Literal(lit) = a
15016 {
15017 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
15018 n.parse::<u32>().ok()
15019 } else { None }
15020 } else {
15021 None
15022 }
15023 });
15024 Some(DataType::Decimal { precision, scale })
15025 }
15026 _ => None,
15027 }
15028 }
15029 _ => None,
15030 }
15031 }
15032
15033 if let Some(mut dt) = expr_to_datatype(&type_expr) {
15034 // For TSQL source: VARCHAR/CHAR without length defaults to 30
15035 let is_tsql_source =
15036 matches!(source, DialectType::TSQL | DialectType::Fabric);
15037 if is_tsql_source {
15038 match &dt {
15039 DataType::VarChar { length: None, .. } => {
15040 dt = DataType::VarChar {
15041 length: Some(30),
15042 parenthesized_length: false,
15043 };
15044 }
15045 DataType::Char { length: None } => {
15046 dt = DataType::Char { length: Some(30) };
15047 }
15048 _ => {}
15049 }
15050 }
15051
15052 // Determine if this is a string type
15053 let is_string_type = matches!(
15054 dt,
15055 DataType::VarChar { .. }
15056 | DataType::Char { .. }
15057 | DataType::Text
15058 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
15059 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
15060 || name.starts_with("VARCHAR(") || name == "VARCHAR"
15061 || name == "STRING");
15062
15063 // Determine if this is a date/time type
15064 let is_datetime_type = matches!(
15065 dt,
15066 DataType::Timestamp { .. } | DataType::Date
15067 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
15068 || name == "DATETIME2" || name == "SMALLDATETIME");
15069
15070 // Check for date conversion with style
15071 if style.is_some() {
15072 let style_num = style.and_then(|s| {
15073 if let Expression::Literal(lit) = s {
15074 if let crate::expressions::Literal::Number(n) =
15075 lit.as_ref()
15076 {
15077 n.parse::<u32>().ok()
15078 } else {
15079 None
15080 }
15081 } else {
15082 None
15083 }
15084 });
15085
15086 // TSQL CONVERT date styles (Java format)
15087 let format_str = style_num.and_then(|n| match n {
15088 101 => Some("MM/dd/yyyy"),
15089 102 => Some("yyyy.MM.dd"),
15090 103 => Some("dd/MM/yyyy"),
15091 104 => Some("dd.MM.yyyy"),
15092 105 => Some("dd-MM-yyyy"),
15093 108 => Some("HH:mm:ss"),
15094 110 => Some("MM-dd-yyyy"),
15095 112 => Some("yyyyMMdd"),
15096 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
15097 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
15098 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
15099 _ => None,
15100 });
15101
15102 // Non-string, non-datetime types with style: just CAST, ignore the style
15103 if !is_string_type && !is_datetime_type {
15104 let cast_expr = if is_try {
15105 Expression::TryCast(Box::new(
15106 crate::expressions::Cast {
15107 this: value_expr,
15108 to: dt,
15109 trailing_comments: Vec::new(),
15110 double_colon_syntax: false,
15111 format: None,
15112 default: None,
15113 inferred_type: None,
15114 },
15115 ))
15116 } else {
15117 Expression::Cast(Box::new(
15118 crate::expressions::Cast {
15119 this: value_expr,
15120 to: dt,
15121 trailing_comments: Vec::new(),
15122 double_colon_syntax: false,
15123 format: None,
15124 default: None,
15125 inferred_type: None,
15126 },
15127 ))
15128 };
15129 return Ok(cast_expr);
15130 }
15131
15132 if let Some(java_fmt) = format_str {
15133 let c_fmt = java_fmt
15134 .replace("yyyy", "%Y")
15135 .replace("MM", "%m")
15136 .replace("dd", "%d")
15137 .replace("HH", "%H")
15138 .replace("mm", "%M")
15139 .replace("ss", "%S")
15140 .replace("SSSSSS", "%f")
15141 .replace("SSS", "%f")
15142 .replace("'T'", "T");
15143
15144 // For datetime target types: style is the INPUT format for parsing strings -> dates
15145 if is_datetime_type {
15146 match target {
15147 DialectType::DuckDB => {
15148 return Ok(Expression::Function(Box::new(
15149 Function::new(
15150 "STRPTIME".to_string(),
15151 vec![
15152 value_expr,
15153 Expression::string(&c_fmt),
15154 ],
15155 ),
15156 )));
15157 }
15158 DialectType::Spark
15159 | DialectType::Databricks => {
15160 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
15161 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
15162 let func_name =
15163 if matches!(dt, DataType::Date) {
15164 "TO_DATE"
15165 } else {
15166 "TO_TIMESTAMP"
15167 };
15168 return Ok(Expression::Function(Box::new(
15169 Function::new(
15170 func_name.to_string(),
15171 vec![
15172 value_expr,
15173 Expression::string(java_fmt),
15174 ],
15175 ),
15176 )));
15177 }
15178 DialectType::Hive => {
15179 return Ok(Expression::Function(Box::new(
15180 Function::new(
15181 "TO_TIMESTAMP".to_string(),
15182 vec![
15183 value_expr,
15184 Expression::string(java_fmt),
15185 ],
15186 ),
15187 )));
15188 }
15189 _ => {
15190 return Ok(Expression::Cast(Box::new(
15191 crate::expressions::Cast {
15192 this: value_expr,
15193 to: dt,
15194 trailing_comments: Vec::new(),
15195 double_colon_syntax: false,
15196 format: None,
15197 default: None,
15198 inferred_type: None,
15199 },
15200 )));
15201 }
15202 }
15203 }
15204
15205 // For string target types: style is the OUTPUT format for dates -> strings
15206 match target {
15207 DialectType::DuckDB => Ok(Expression::Function(
15208 Box::new(Function::new(
15209 "STRPTIME".to_string(),
15210 vec![
15211 value_expr,
15212 Expression::string(&c_fmt),
15213 ],
15214 )),
15215 )),
15216 DialectType::Spark | DialectType::Databricks => {
15217 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
15218 // Determine the target string type
15219 let string_dt = match &dt {
15220 DataType::VarChar {
15221 length: Some(l),
15222 ..
15223 } => DataType::VarChar {
15224 length: Some(*l),
15225 parenthesized_length: false,
15226 },
15227 DataType::Text => DataType::Custom {
15228 name: "STRING".to_string(),
15229 },
15230 _ => DataType::Custom {
15231 name: "STRING".to_string(),
15232 },
15233 };
15234 let date_format_expr = Expression::Function(
15235 Box::new(Function::new(
15236 "DATE_FORMAT".to_string(),
15237 vec![
15238 value_expr,
15239 Expression::string(java_fmt),
15240 ],
15241 )),
15242 );
15243 let cast_expr = if is_try {
15244 Expression::TryCast(Box::new(
15245 crate::expressions::Cast {
15246 this: date_format_expr,
15247 to: string_dt,
15248 trailing_comments: Vec::new(),
15249 double_colon_syntax: false,
15250 format: None,
15251 default: None,
15252 inferred_type: None,
15253 },
15254 ))
15255 } else {
15256 Expression::Cast(Box::new(
15257 crate::expressions::Cast {
15258 this: date_format_expr,
15259 to: string_dt,
15260 trailing_comments: Vec::new(),
15261 double_colon_syntax: false,
15262 format: None,
15263 default: None,
15264 inferred_type: None,
15265 },
15266 ))
15267 };
15268 Ok(cast_expr)
15269 }
15270 DialectType::MySQL | DialectType::SingleStore => {
15271 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
15272 let mysql_fmt = java_fmt
15273 .replace("yyyy", "%Y")
15274 .replace("MM", "%m")
15275 .replace("dd", "%d")
15276 .replace("HH:mm:ss.SSSSSS", "%T")
15277 .replace("HH:mm:ss", "%T")
15278 .replace("HH", "%H")
15279 .replace("mm", "%i")
15280 .replace("ss", "%S");
15281 let date_format_expr = Expression::Function(
15282 Box::new(Function::new(
15283 "DATE_FORMAT".to_string(),
15284 vec![
15285 value_expr,
15286 Expression::string(&mysql_fmt),
15287 ],
15288 )),
15289 );
15290 // MySQL uses CHAR for string casts
15291 let mysql_dt = match &dt {
15292 DataType::VarChar { length, .. } => {
15293 DataType::Char { length: *length }
15294 }
15295 _ => dt,
15296 };
15297 Ok(Expression::Cast(Box::new(
15298 crate::expressions::Cast {
15299 this: date_format_expr,
15300 to: mysql_dt,
15301 trailing_comments: Vec::new(),
15302 double_colon_syntax: false,
15303 format: None,
15304 default: None,
15305 inferred_type: None,
15306 },
15307 )))
15308 }
15309 DialectType::Hive => {
15310 let func_name = "TO_TIMESTAMP";
15311 Ok(Expression::Function(Box::new(
15312 Function::new(
15313 func_name.to_string(),
15314 vec![
15315 value_expr,
15316 Expression::string(java_fmt),
15317 ],
15318 ),
15319 )))
15320 }
15321 _ => Ok(Expression::Cast(Box::new(
15322 crate::expressions::Cast {
15323 this: value_expr,
15324 to: dt,
15325 trailing_comments: Vec::new(),
15326 double_colon_syntax: false,
15327 format: None,
15328 default: None,
15329 inferred_type: None,
15330 },
15331 ))),
15332 }
15333 } else {
15334 // Unknown style, just CAST
15335 let cast_expr = if is_try {
15336 Expression::TryCast(Box::new(
15337 crate::expressions::Cast {
15338 this: value_expr,
15339 to: dt,
15340 trailing_comments: Vec::new(),
15341 double_colon_syntax: false,
15342 format: None,
15343 default: None,
15344 inferred_type: None,
15345 },
15346 ))
15347 } else {
15348 Expression::Cast(Box::new(
15349 crate::expressions::Cast {
15350 this: value_expr,
15351 to: dt,
15352 trailing_comments: Vec::new(),
15353 double_colon_syntax: false,
15354 format: None,
15355 default: None,
15356 inferred_type: None,
15357 },
15358 ))
15359 };
15360 Ok(cast_expr)
15361 }
15362 } else {
15363 // No style - simple CAST
15364 let final_dt = if matches!(
15365 target,
15366 DialectType::MySQL | DialectType::SingleStore
15367 ) {
15368 match &dt {
15369 DataType::Int { .. }
15370 | DataType::BigInt { .. }
15371 | DataType::SmallInt { .. }
15372 | DataType::TinyInt { .. } => DataType::Custom {
15373 name: "SIGNED".to_string(),
15374 },
15375 DataType::VarChar { length, .. } => {
15376 DataType::Char { length: *length }
15377 }
15378 _ => dt,
15379 }
15380 } else {
15381 dt
15382 };
15383 let cast_expr = if is_try {
15384 Expression::TryCast(Box::new(
15385 crate::expressions::Cast {
15386 this: value_expr,
15387 to: final_dt,
15388 trailing_comments: Vec::new(),
15389 double_colon_syntax: false,
15390 format: None,
15391 default: None,
15392 inferred_type: None,
15393 },
15394 ))
15395 } else {
15396 Expression::Cast(Box::new(crate::expressions::Cast {
15397 this: value_expr,
15398 to: final_dt,
15399 trailing_comments: Vec::new(),
15400 double_colon_syntax: false,
15401 format: None,
15402 default: None,
15403 inferred_type: None,
15404 }))
15405 };
15406 Ok(cast_expr)
15407 }
15408 } else {
15409 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
15410 Ok(Expression::Function(f))
15411 }
15412 }
15413 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
15414 "STRFTIME" if f.args.len() == 2 => {
15415 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
15416 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
15417 // SQLite: args[0] = format, args[1] = value
15418 (f.args[1].clone(), &f.args[0])
15419 } else {
15420 // DuckDB and others: args[0] = value, args[1] = format
15421 (f.args[0].clone(), &f.args[1])
15422 };
15423
15424 // Helper to convert C-style format to Java-style
15425 fn c_to_java_format(fmt: &str) -> String {
15426 fmt.replace("%Y", "yyyy")
15427 .replace("%m", "MM")
15428 .replace("%d", "dd")
15429 .replace("%H", "HH")
15430 .replace("%M", "mm")
15431 .replace("%S", "ss")
15432 .replace("%f", "SSSSSS")
15433 .replace("%y", "yy")
15434 .replace("%-m", "M")
15435 .replace("%-d", "d")
15436 .replace("%-H", "H")
15437 .replace("%-I", "h")
15438 .replace("%I", "hh")
15439 .replace("%p", "a")
15440 .replace("%j", "DDD")
15441 .replace("%a", "EEE")
15442 .replace("%b", "MMM")
15443 .replace("%F", "yyyy-MM-dd")
15444 .replace("%T", "HH:mm:ss")
15445 }
15446
15447 // Helper: recursively convert format strings within expressions (handles CONCAT)
15448 fn convert_fmt_expr(
15449 expr: &Expression,
15450 converter: &dyn Fn(&str) -> String,
15451 ) -> Expression {
15452 match expr {
15453 Expression::Literal(lit)
15454 if matches!(
15455 lit.as_ref(),
15456 crate::expressions::Literal::String(_)
15457 ) =>
15458 {
15459 let crate::expressions::Literal::String(s) =
15460 lit.as_ref()
15461 else {
15462 unreachable!()
15463 };
15464 Expression::string(&converter(s))
15465 }
15466 Expression::Function(func)
15467 if func.name.eq_ignore_ascii_case("CONCAT") =>
15468 {
15469 let new_args: Vec<Expression> = func
15470 .args
15471 .iter()
15472 .map(|a| convert_fmt_expr(a, converter))
15473 .collect();
15474 Expression::Function(Box::new(Function::new(
15475 "CONCAT".to_string(),
15476 new_args,
15477 )))
15478 }
15479 other => other.clone(),
15480 }
15481 }
15482
15483 match target {
15484 DialectType::DuckDB => {
15485 if matches!(source, DialectType::SQLite) {
15486 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
15487 let cast_val = Expression::Cast(Box::new(Cast {
15488 this: val,
15489 to: crate::expressions::DataType::Timestamp {
15490 precision: None,
15491 timezone: false,
15492 },
15493 trailing_comments: Vec::new(),
15494 double_colon_syntax: false,
15495 format: None,
15496 default: None,
15497 inferred_type: None,
15498 }));
15499 Ok(Expression::Function(Box::new(Function::new(
15500 "STRFTIME".to_string(),
15501 vec![cast_val, fmt_expr.clone()],
15502 ))))
15503 } else {
15504 Ok(Expression::Function(f))
15505 }
15506 }
15507 DialectType::Spark
15508 | DialectType::Databricks
15509 | DialectType::Hive => {
15510 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
15511 let converted_fmt =
15512 convert_fmt_expr(fmt_expr, &c_to_java_format);
15513 Ok(Expression::Function(Box::new(Function::new(
15514 "DATE_FORMAT".to_string(),
15515 vec![val, converted_fmt],
15516 ))))
15517 }
15518 DialectType::TSQL | DialectType::Fabric => {
15519 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
15520 let converted_fmt =
15521 convert_fmt_expr(fmt_expr, &c_to_java_format);
15522 Ok(Expression::Function(Box::new(Function::new(
15523 "FORMAT".to_string(),
15524 vec![val, converted_fmt],
15525 ))))
15526 }
15527 DialectType::Presto
15528 | DialectType::Trino
15529 | DialectType::Athena => {
15530 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
15531 if let Expression::Literal(lit) = fmt_expr {
15532 if let crate::expressions::Literal::String(s) =
15533 lit.as_ref()
15534 {
15535 let presto_fmt = duckdb_to_presto_format(s);
15536 Ok(Expression::Function(Box::new(Function::new(
15537 "DATE_FORMAT".to_string(),
15538 vec![val, Expression::string(&presto_fmt)],
15539 ))))
15540 } else {
15541 Ok(Expression::Function(Box::new(Function::new(
15542 "DATE_FORMAT".to_string(),
15543 vec![val, fmt_expr.clone()],
15544 ))))
15545 }
15546 } else {
15547 Ok(Expression::Function(Box::new(Function::new(
15548 "DATE_FORMAT".to_string(),
15549 vec![val, fmt_expr.clone()],
15550 ))))
15551 }
15552 }
15553 DialectType::BigQuery => {
15554 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
15555 if let Expression::Literal(lit) = fmt_expr {
15556 if let crate::expressions::Literal::String(s) =
15557 lit.as_ref()
15558 {
15559 let bq_fmt = duckdb_to_bigquery_format(s);
15560 Ok(Expression::Function(Box::new(Function::new(
15561 "FORMAT_DATE".to_string(),
15562 vec![Expression::string(&bq_fmt), val],
15563 ))))
15564 } else {
15565 Ok(Expression::Function(Box::new(Function::new(
15566 "FORMAT_DATE".to_string(),
15567 vec![fmt_expr.clone(), val],
15568 ))))
15569 }
15570 } else {
15571 Ok(Expression::Function(Box::new(Function::new(
15572 "FORMAT_DATE".to_string(),
15573 vec![fmt_expr.clone(), val],
15574 ))))
15575 }
15576 }
15577 DialectType::PostgreSQL | DialectType::Redshift => {
15578 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
15579 if let Expression::Literal(lit) = fmt_expr {
15580 if let crate::expressions::Literal::String(s) =
15581 lit.as_ref()
15582 {
15583 let pg_fmt = s
15584 .replace("%Y", "YYYY")
15585 .replace("%m", "MM")
15586 .replace("%d", "DD")
15587 .replace("%H", "HH24")
15588 .replace("%M", "MI")
15589 .replace("%S", "SS")
15590 .replace("%y", "YY")
15591 .replace("%-m", "FMMM")
15592 .replace("%-d", "FMDD")
15593 .replace("%-H", "FMHH24")
15594 .replace("%-I", "FMHH12")
15595 .replace("%p", "AM")
15596 .replace("%F", "YYYY-MM-DD")
15597 .replace("%T", "HH24:MI:SS");
15598 Ok(Expression::Function(Box::new(Function::new(
15599 "TO_CHAR".to_string(),
15600 vec![val, Expression::string(&pg_fmt)],
15601 ))))
15602 } else {
15603 Ok(Expression::Function(Box::new(Function::new(
15604 "TO_CHAR".to_string(),
15605 vec![val, fmt_expr.clone()],
15606 ))))
15607 }
15608 } else {
15609 Ok(Expression::Function(Box::new(Function::new(
15610 "TO_CHAR".to_string(),
15611 vec![val, fmt_expr.clone()],
15612 ))))
15613 }
15614 }
15615 _ => Ok(Expression::Function(f)),
15616 }
15617 }
15618 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
15619 "STRPTIME" if f.args.len() == 2 => {
15620 let val = f.args[0].clone();
15621 let fmt_expr = &f.args[1];
15622
15623 fn c_to_java_format_parse(fmt: &str) -> String {
15624 fmt.replace("%Y", "yyyy")
15625 .replace("%m", "MM")
15626 .replace("%d", "dd")
15627 .replace("%H", "HH")
15628 .replace("%M", "mm")
15629 .replace("%S", "ss")
15630 .replace("%f", "SSSSSS")
15631 .replace("%y", "yy")
15632 .replace("%-m", "M")
15633 .replace("%-d", "d")
15634 .replace("%-H", "H")
15635 .replace("%-I", "h")
15636 .replace("%I", "hh")
15637 .replace("%p", "a")
15638 .replace("%F", "yyyy-MM-dd")
15639 .replace("%T", "HH:mm:ss")
15640 }
15641
15642 match target {
15643 DialectType::DuckDB => Ok(Expression::Function(f)),
15644 DialectType::Spark | DialectType::Databricks => {
15645 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
15646 if let Expression::Literal(lit) = fmt_expr {
15647 if let crate::expressions::Literal::String(s) =
15648 lit.as_ref()
15649 {
15650 let java_fmt = c_to_java_format_parse(s);
15651 Ok(Expression::Function(Box::new(Function::new(
15652 "TO_TIMESTAMP".to_string(),
15653 vec![val, Expression::string(&java_fmt)],
15654 ))))
15655 } else {
15656 Ok(Expression::Function(Box::new(Function::new(
15657 "TO_TIMESTAMP".to_string(),
15658 vec![val, fmt_expr.clone()],
15659 ))))
15660 }
15661 } else {
15662 Ok(Expression::Function(Box::new(Function::new(
15663 "TO_TIMESTAMP".to_string(),
15664 vec![val, fmt_expr.clone()],
15665 ))))
15666 }
15667 }
15668 DialectType::Hive => {
15669 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
15670 if let Expression::Literal(lit) = fmt_expr {
15671 if let crate::expressions::Literal::String(s) =
15672 lit.as_ref()
15673 {
15674 let java_fmt = c_to_java_format_parse(s);
15675 let unix_ts =
15676 Expression::Function(Box::new(Function::new(
15677 "UNIX_TIMESTAMP".to_string(),
15678 vec![val, Expression::string(&java_fmt)],
15679 )));
15680 let from_unix =
15681 Expression::Function(Box::new(Function::new(
15682 "FROM_UNIXTIME".to_string(),
15683 vec![unix_ts],
15684 )));
15685 Ok(Expression::Cast(Box::new(
15686 crate::expressions::Cast {
15687 this: from_unix,
15688 to: DataType::Timestamp {
15689 timezone: false,
15690 precision: None,
15691 },
15692 trailing_comments: Vec::new(),
15693 double_colon_syntax: false,
15694 format: None,
15695 default: None,
15696 inferred_type: None,
15697 },
15698 )))
15699 } else {
15700 Ok(Expression::Function(f))
15701 }
15702 } else {
15703 Ok(Expression::Function(f))
15704 }
15705 }
15706 DialectType::Presto
15707 | DialectType::Trino
15708 | DialectType::Athena => {
15709 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
15710 if let Expression::Literal(lit) = fmt_expr {
15711 if let crate::expressions::Literal::String(s) =
15712 lit.as_ref()
15713 {
15714 let presto_fmt = duckdb_to_presto_format(s);
15715 Ok(Expression::Function(Box::new(Function::new(
15716 "DATE_PARSE".to_string(),
15717 vec![val, Expression::string(&presto_fmt)],
15718 ))))
15719 } else {
15720 Ok(Expression::Function(Box::new(Function::new(
15721 "DATE_PARSE".to_string(),
15722 vec![val, fmt_expr.clone()],
15723 ))))
15724 }
15725 } else {
15726 Ok(Expression::Function(Box::new(Function::new(
15727 "DATE_PARSE".to_string(),
15728 vec![val, fmt_expr.clone()],
15729 ))))
15730 }
15731 }
15732 DialectType::BigQuery => {
15733 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
15734 if let Expression::Literal(lit) = fmt_expr {
15735 if let crate::expressions::Literal::String(s) =
15736 lit.as_ref()
15737 {
15738 let bq_fmt = duckdb_to_bigquery_format(s);
15739 Ok(Expression::Function(Box::new(Function::new(
15740 "PARSE_TIMESTAMP".to_string(),
15741 vec![Expression::string(&bq_fmt), val],
15742 ))))
15743 } else {
15744 Ok(Expression::Function(Box::new(Function::new(
15745 "PARSE_TIMESTAMP".to_string(),
15746 vec![fmt_expr.clone(), val],
15747 ))))
15748 }
15749 } else {
15750 Ok(Expression::Function(Box::new(Function::new(
15751 "PARSE_TIMESTAMP".to_string(),
15752 vec![fmt_expr.clone(), val],
15753 ))))
15754 }
15755 }
15756 _ => Ok(Expression::Function(f)),
15757 }
15758 }
15759 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
15760 "DATE_FORMAT"
15761 if f.args.len() >= 2
15762 && matches!(
15763 source,
15764 DialectType::Presto
15765 | DialectType::Trino
15766 | DialectType::Athena
15767 ) =>
15768 {
15769 let val = f.args[0].clone();
15770 let fmt_expr = &f.args[1];
15771
15772 match target {
15773 DialectType::Presto
15774 | DialectType::Trino
15775 | DialectType::Athena => {
15776 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
15777 if let Expression::Literal(lit) = fmt_expr {
15778 if let crate::expressions::Literal::String(s) =
15779 lit.as_ref()
15780 {
15781 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15782 Ok(Expression::Function(Box::new(Function::new(
15783 "DATE_FORMAT".to_string(),
15784 vec![val, Expression::string(&normalized)],
15785 ))))
15786 } else {
15787 Ok(Expression::Function(f))
15788 }
15789 } else {
15790 Ok(Expression::Function(f))
15791 }
15792 }
15793 DialectType::Hive
15794 | DialectType::Spark
15795 | DialectType::Databricks => {
15796 // Convert Presto C-style to Java-style format
15797 if let Expression::Literal(lit) = fmt_expr {
15798 if let crate::expressions::Literal::String(s) =
15799 lit.as_ref()
15800 {
15801 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15802 Ok(Expression::Function(Box::new(Function::new(
15803 "DATE_FORMAT".to_string(),
15804 vec![val, Expression::string(&java_fmt)],
15805 ))))
15806 } else {
15807 Ok(Expression::Function(f))
15808 }
15809 } else {
15810 Ok(Expression::Function(f))
15811 }
15812 }
15813 DialectType::DuckDB => {
15814 // Convert to STRFTIME(val, duckdb_fmt)
15815 if let Expression::Literal(lit) = fmt_expr {
15816 if let crate::expressions::Literal::String(s) =
15817 lit.as_ref()
15818 {
15819 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15820 Ok(Expression::Function(Box::new(Function::new(
15821 "STRFTIME".to_string(),
15822 vec![val, Expression::string(&duckdb_fmt)],
15823 ))))
15824 } else {
15825 Ok(Expression::Function(Box::new(Function::new(
15826 "STRFTIME".to_string(),
15827 vec![val, fmt_expr.clone()],
15828 ))))
15829 }
15830 } else {
15831 Ok(Expression::Function(Box::new(Function::new(
15832 "STRFTIME".to_string(),
15833 vec![val, fmt_expr.clone()],
15834 ))))
15835 }
15836 }
15837 DialectType::BigQuery => {
15838 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
15839 if let Expression::Literal(lit) = fmt_expr {
15840 if let crate::expressions::Literal::String(s) =
15841 lit.as_ref()
15842 {
15843 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
15844 Ok(Expression::Function(Box::new(Function::new(
15845 "FORMAT_DATE".to_string(),
15846 vec![Expression::string(&bq_fmt), val],
15847 ))))
15848 } else {
15849 Ok(Expression::Function(Box::new(Function::new(
15850 "FORMAT_DATE".to_string(),
15851 vec![fmt_expr.clone(), val],
15852 ))))
15853 }
15854 } else {
15855 Ok(Expression::Function(Box::new(Function::new(
15856 "FORMAT_DATE".to_string(),
15857 vec![fmt_expr.clone(), val],
15858 ))))
15859 }
15860 }
15861 _ => Ok(Expression::Function(f)),
15862 }
15863 }
15864 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
15865 "DATE_PARSE"
15866 if f.args.len() >= 2
15867 && matches!(
15868 source,
15869 DialectType::Presto
15870 | DialectType::Trino
15871 | DialectType::Athena
15872 ) =>
15873 {
15874 let val = f.args[0].clone();
15875 let fmt_expr = &f.args[1];
15876
15877 match target {
15878 DialectType::Presto
15879 | DialectType::Trino
15880 | DialectType::Athena => {
15881 // Presto -> Presto: normalize format
15882 if let Expression::Literal(lit) = fmt_expr {
15883 if let crate::expressions::Literal::String(s) =
15884 lit.as_ref()
15885 {
15886 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15887 Ok(Expression::Function(Box::new(Function::new(
15888 "DATE_PARSE".to_string(),
15889 vec![val, Expression::string(&normalized)],
15890 ))))
15891 } else {
15892 Ok(Expression::Function(f))
15893 }
15894 } else {
15895 Ok(Expression::Function(f))
15896 }
15897 }
15898 DialectType::Hive => {
15899 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
15900 if let Expression::Literal(lit) = fmt_expr {
15901 if let crate::expressions::Literal::String(s) =
15902 lit.as_ref()
15903 {
15904 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
15905 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
15906 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15907 this: val,
15908 to: DataType::Timestamp { timezone: false, precision: None },
15909 trailing_comments: Vec::new(),
15910 double_colon_syntax: false,
15911 format: None,
15912 default: None,
15913 inferred_type: None,
15914 })))
15915 } else {
15916 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15917 Ok(Expression::Function(Box::new(Function::new(
15918 "TO_TIMESTAMP".to_string(),
15919 vec![val, Expression::string(&java_fmt)],
15920 ))))
15921 }
15922 } else {
15923 Ok(Expression::Function(f))
15924 }
15925 } else {
15926 Ok(Expression::Function(f))
15927 }
15928 }
15929 DialectType::Spark | DialectType::Databricks => {
15930 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
15931 if let Expression::Literal(lit) = fmt_expr {
15932 if let crate::expressions::Literal::String(s) =
15933 lit.as_ref()
15934 {
15935 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15936 Ok(Expression::Function(Box::new(Function::new(
15937 "TO_TIMESTAMP".to_string(),
15938 vec![val, Expression::string(&java_fmt)],
15939 ))))
15940 } else {
15941 Ok(Expression::Function(f))
15942 }
15943 } else {
15944 Ok(Expression::Function(f))
15945 }
15946 }
15947 DialectType::DuckDB => {
15948 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
15949 if let Expression::Literal(lit) = fmt_expr {
15950 if let crate::expressions::Literal::String(s) =
15951 lit.as_ref()
15952 {
15953 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15954 Ok(Expression::Function(Box::new(Function::new(
15955 "STRPTIME".to_string(),
15956 vec![val, Expression::string(&duckdb_fmt)],
15957 ))))
15958 } else {
15959 Ok(Expression::Function(Box::new(Function::new(
15960 "STRPTIME".to_string(),
15961 vec![val, fmt_expr.clone()],
15962 ))))
15963 }
15964 } else {
15965 Ok(Expression::Function(Box::new(Function::new(
15966 "STRPTIME".to_string(),
15967 vec![val, fmt_expr.clone()],
15968 ))))
15969 }
15970 }
15971 _ => Ok(Expression::Function(f)),
15972 }
15973 }
15974 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
15975 "FROM_BASE64"
15976 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15977 {
15978 Ok(Expression::Function(Box::new(Function::new(
15979 "UNBASE64".to_string(),
15980 f.args,
15981 ))))
15982 }
15983 "TO_BASE64"
15984 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15985 {
15986 Ok(Expression::Function(Box::new(Function::new(
15987 "BASE64".to_string(),
15988 f.args,
15989 ))))
15990 }
15991 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
15992 "FROM_UNIXTIME"
15993 if f.args.len() == 1
15994 && matches!(
15995 source,
15996 DialectType::Presto
15997 | DialectType::Trino
15998 | DialectType::Athena
15999 )
16000 && matches!(
16001 target,
16002 DialectType::Spark | DialectType::Databricks
16003 ) =>
16004 {
16005 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
16006 let from_unix = Expression::Function(Box::new(Function::new(
16007 "FROM_UNIXTIME".to_string(),
16008 f.args,
16009 )));
16010 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
16011 this: from_unix,
16012 to: DataType::Timestamp {
16013 timezone: false,
16014 precision: None,
16015 },
16016 trailing_comments: Vec::new(),
16017 double_colon_syntax: false,
16018 format: None,
16019 default: None,
16020 inferred_type: None,
16021 })))
16022 }
16023 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
16024 "DATE_FORMAT"
16025 if f.args.len() >= 2
16026 && !matches!(
16027 target,
16028 DialectType::Hive
16029 | DialectType::Spark
16030 | DialectType::Databricks
16031 | DialectType::MySQL
16032 | DialectType::SingleStore
16033 ) =>
16034 {
16035 let val = f.args[0].clone();
16036 let fmt_expr = &f.args[1];
16037 let is_hive_source = matches!(
16038 source,
16039 DialectType::Hive
16040 | DialectType::Spark
16041 | DialectType::Databricks
16042 );
16043
16044 fn java_to_c_format(fmt: &str) -> String {
16045 // Replace Java patterns with C strftime patterns.
16046 // Uses multi-pass to handle patterns that conflict.
16047 // First pass: replace multi-char patterns (longer first)
16048 let result = fmt
16049 .replace("yyyy", "%Y")
16050 .replace("SSSSSS", "%f")
16051 .replace("EEEE", "%W")
16052 .replace("MM", "%m")
16053 .replace("dd", "%d")
16054 .replace("HH", "%H")
16055 .replace("mm", "%M")
16056 .replace("ss", "%S")
16057 .replace("yy", "%y");
16058 // Second pass: handle single-char timezone patterns
16059 // z -> %Z (timezone name), Z -> %z (timezone offset)
16060 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
16061 let mut out = String::new();
16062 let chars: Vec<char> = result.chars().collect();
16063 let mut i = 0;
16064 while i < chars.len() {
16065 if chars[i] == '%' && i + 1 < chars.len() {
16066 // Already a format specifier, skip both chars
16067 out.push(chars[i]);
16068 out.push(chars[i + 1]);
16069 i += 2;
16070 } else if chars[i] == 'z' {
16071 out.push_str("%Z");
16072 i += 1;
16073 } else if chars[i] == 'Z' {
16074 out.push_str("%z");
16075 i += 1;
16076 } else {
16077 out.push(chars[i]);
16078 i += 1;
16079 }
16080 }
16081 out
16082 }
16083
16084 fn java_to_presto_format(fmt: &str) -> String {
16085 // Presto uses %T for HH:MM:SS
16086 let c_fmt = java_to_c_format(fmt);
16087 c_fmt.replace("%H:%M:%S", "%T")
16088 }
16089
16090 fn java_to_bq_format(fmt: &str) -> String {
16091 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
16092 let c_fmt = java_to_c_format(fmt);
16093 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
16094 }
16095
16096 // For Hive source, CAST string literals to appropriate type
16097 let cast_val = if is_hive_source {
16098 match &val {
16099 Expression::Literal(lit)
16100 if matches!(
16101 lit.as_ref(),
16102 crate::expressions::Literal::String(_)
16103 ) =>
16104 {
16105 match target {
16106 DialectType::DuckDB
16107 | DialectType::Presto
16108 | DialectType::Trino
16109 | DialectType::Athena => {
16110 Self::ensure_cast_timestamp(val.clone())
16111 }
16112 DialectType::BigQuery => {
16113 // BigQuery: CAST(val AS DATETIME)
16114 Expression::Cast(Box::new(
16115 crate::expressions::Cast {
16116 this: val.clone(),
16117 to: DataType::Custom {
16118 name: "DATETIME".to_string(),
16119 },
16120 trailing_comments: vec![],
16121 double_colon_syntax: false,
16122 format: None,
16123 default: None,
16124 inferred_type: None,
16125 },
16126 ))
16127 }
16128 _ => val.clone(),
16129 }
16130 }
16131 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
16132 Expression::Cast(c)
16133 if matches!(c.to, DataType::Date)
16134 && matches!(
16135 target,
16136 DialectType::Presto
16137 | DialectType::Trino
16138 | DialectType::Athena
16139 ) =>
16140 {
16141 Expression::Cast(Box::new(crate::expressions::Cast {
16142 this: val.clone(),
16143 to: DataType::Timestamp {
16144 timezone: false,
16145 precision: None,
16146 },
16147 trailing_comments: vec![],
16148 double_colon_syntax: false,
16149 format: None,
16150 default: None,
16151 inferred_type: None,
16152 }))
16153 }
16154 Expression::Literal(lit)
16155 if matches!(
16156 lit.as_ref(),
16157 crate::expressions::Literal::Date(_)
16158 ) && matches!(
16159 target,
16160 DialectType::Presto
16161 | DialectType::Trino
16162 | DialectType::Athena
16163 ) =>
16164 {
16165 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
16166 let cast_date = Self::date_literal_to_cast(val.clone());
16167 Expression::Cast(Box::new(crate::expressions::Cast {
16168 this: cast_date,
16169 to: DataType::Timestamp {
16170 timezone: false,
16171 precision: None,
16172 },
16173 trailing_comments: vec![],
16174 double_colon_syntax: false,
16175 format: None,
16176 default: None,
16177 inferred_type: None,
16178 }))
16179 }
16180 _ => val.clone(),
16181 }
16182 } else {
16183 val.clone()
16184 };
16185
16186 match target {
16187 DialectType::DuckDB => {
16188 if let Expression::Literal(lit) = fmt_expr {
16189 if let crate::expressions::Literal::String(s) =
16190 lit.as_ref()
16191 {
16192 let c_fmt = if is_hive_source {
16193 java_to_c_format(s)
16194 } else {
16195 s.clone()
16196 };
16197 Ok(Expression::Function(Box::new(Function::new(
16198 "STRFTIME".to_string(),
16199 vec![cast_val, Expression::string(&c_fmt)],
16200 ))))
16201 } else {
16202 Ok(Expression::Function(Box::new(Function::new(
16203 "STRFTIME".to_string(),
16204 vec![cast_val, fmt_expr.clone()],
16205 ))))
16206 }
16207 } else {
16208 Ok(Expression::Function(Box::new(Function::new(
16209 "STRFTIME".to_string(),
16210 vec![cast_val, fmt_expr.clone()],
16211 ))))
16212 }
16213 }
16214 DialectType::Presto
16215 | DialectType::Trino
16216 | DialectType::Athena => {
16217 if is_hive_source {
16218 if let Expression::Literal(lit) = fmt_expr {
16219 if let crate::expressions::Literal::String(s) =
16220 lit.as_ref()
16221 {
16222 let p_fmt = java_to_presto_format(s);
16223 Ok(Expression::Function(Box::new(
16224 Function::new(
16225 "DATE_FORMAT".to_string(),
16226 vec![
16227 cast_val,
16228 Expression::string(&p_fmt),
16229 ],
16230 ),
16231 )))
16232 } else {
16233 Ok(Expression::Function(Box::new(
16234 Function::new(
16235 "DATE_FORMAT".to_string(),
16236 vec![cast_val, fmt_expr.clone()],
16237 ),
16238 )))
16239 }
16240 } else {
16241 Ok(Expression::Function(Box::new(Function::new(
16242 "DATE_FORMAT".to_string(),
16243 vec![cast_val, fmt_expr.clone()],
16244 ))))
16245 }
16246 } else {
16247 Ok(Expression::Function(Box::new(Function::new(
16248 "DATE_FORMAT".to_string(),
16249 f.args,
16250 ))))
16251 }
16252 }
16253 DialectType::BigQuery => {
16254 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
16255 if let Expression::Literal(lit) = fmt_expr {
16256 if let crate::expressions::Literal::String(s) =
16257 lit.as_ref()
16258 {
16259 let bq_fmt = if is_hive_source {
16260 java_to_bq_format(s)
16261 } else {
16262 java_to_c_format(s)
16263 };
16264 Ok(Expression::Function(Box::new(Function::new(
16265 "FORMAT_DATE".to_string(),
16266 vec![Expression::string(&bq_fmt), cast_val],
16267 ))))
16268 } else {
16269 Ok(Expression::Function(Box::new(Function::new(
16270 "FORMAT_DATE".to_string(),
16271 vec![fmt_expr.clone(), cast_val],
16272 ))))
16273 }
16274 } else {
16275 Ok(Expression::Function(Box::new(Function::new(
16276 "FORMAT_DATE".to_string(),
16277 vec![fmt_expr.clone(), cast_val],
16278 ))))
16279 }
16280 }
16281 DialectType::PostgreSQL | DialectType::Redshift => {
16282 if let Expression::Literal(lit) = fmt_expr {
16283 if let crate::expressions::Literal::String(s) =
16284 lit.as_ref()
16285 {
16286 let pg_fmt = s
16287 .replace("yyyy", "YYYY")
16288 .replace("MM", "MM")
16289 .replace("dd", "DD")
16290 .replace("HH", "HH24")
16291 .replace("mm", "MI")
16292 .replace("ss", "SS")
16293 .replace("yy", "YY");
16294 Ok(Expression::Function(Box::new(Function::new(
16295 "TO_CHAR".to_string(),
16296 vec![val, Expression::string(&pg_fmt)],
16297 ))))
16298 } else {
16299 Ok(Expression::Function(Box::new(Function::new(
16300 "TO_CHAR".to_string(),
16301 vec![val, fmt_expr.clone()],
16302 ))))
16303 }
16304 } else {
16305 Ok(Expression::Function(Box::new(Function::new(
16306 "TO_CHAR".to_string(),
16307 vec![val, fmt_expr.clone()],
16308 ))))
16309 }
16310 }
16311 _ => Ok(Expression::Function(f)),
16312 }
16313 }
16314 // DATEDIFF(unit, start, end) - 3-arg form
16315 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
16316 "DATEDIFF" if f.args.len() == 3 => {
16317 let mut args = f.args;
16318 // SQLite source: args = (date1, date2, unit_string)
16319 // Standard source: args = (unit, start, end)
16320 let (_arg0, arg1, arg2, unit_str) =
16321 if matches!(source, DialectType::SQLite) {
16322 let date1 = args.remove(0);
16323 let date2 = args.remove(0);
16324 let unit_expr = args.remove(0);
16325 let unit_s = Self::get_unit_str_static(&unit_expr);
16326
16327 // For SQLite target, generate JULIANDAY arithmetic directly
16328 if matches!(target, DialectType::SQLite) {
16329 let jd_first = Expression::Function(Box::new(
16330 Function::new("JULIANDAY".to_string(), vec![date1]),
16331 ));
16332 let jd_second = Expression::Function(Box::new(
16333 Function::new("JULIANDAY".to_string(), vec![date2]),
16334 ));
16335 let diff = Expression::Sub(Box::new(
16336 crate::expressions::BinaryOp::new(
16337 jd_first, jd_second,
16338 ),
16339 ));
16340 let paren_diff = Expression::Paren(Box::new(
16341 crate::expressions::Paren {
16342 this: diff,
16343 trailing_comments: Vec::new(),
16344 },
16345 ));
16346 let adjusted = match unit_s.as_str() {
16347 "HOUR" => Expression::Mul(Box::new(
16348 crate::expressions::BinaryOp::new(
16349 paren_diff,
16350 Expression::Literal(Box::new(
16351 Literal::Number("24.0".to_string()),
16352 )),
16353 ),
16354 )),
16355 "MINUTE" => Expression::Mul(Box::new(
16356 crate::expressions::BinaryOp::new(
16357 paren_diff,
16358 Expression::Literal(Box::new(
16359 Literal::Number("1440.0".to_string()),
16360 )),
16361 ),
16362 )),
16363 "SECOND" => Expression::Mul(Box::new(
16364 crate::expressions::BinaryOp::new(
16365 paren_diff,
16366 Expression::Literal(Box::new(
16367 Literal::Number("86400.0".to_string()),
16368 )),
16369 ),
16370 )),
16371 "MONTH" => Expression::Div(Box::new(
16372 crate::expressions::BinaryOp::new(
16373 paren_diff,
16374 Expression::Literal(Box::new(
16375 Literal::Number("30.0".to_string()),
16376 )),
16377 ),
16378 )),
16379 "YEAR" => Expression::Div(Box::new(
16380 crate::expressions::BinaryOp::new(
16381 paren_diff,
16382 Expression::Literal(Box::new(
16383 Literal::Number("365.0".to_string()),
16384 )),
16385 ),
16386 )),
16387 _ => paren_diff,
16388 };
16389 return Ok(Expression::Cast(Box::new(Cast {
16390 this: adjusted,
16391 to: DataType::Int {
16392 length: None,
16393 integer_spelling: true,
16394 },
16395 trailing_comments: vec![],
16396 double_colon_syntax: false,
16397 format: None,
16398 default: None,
16399 inferred_type: None,
16400 })));
16401 }
16402
16403 // For other targets, remap to standard (unit, start, end) form
16404 let unit_ident =
16405 Expression::Identifier(Identifier::new(&unit_s));
16406 (unit_ident, date1, date2, unit_s)
16407 } else {
16408 let arg0 = args.remove(0);
16409 let arg1 = args.remove(0);
16410 let arg2 = args.remove(0);
16411 let unit_s = Self::get_unit_str_static(&arg0);
16412 (arg0, arg1, arg2, unit_s)
16413 };
16414
16415 // For Hive/Spark source, string literal dates need to be cast
16416 // Note: Databricks is excluded - it handles string args like standard SQL
16417 let is_hive_spark =
16418 matches!(source, DialectType::Hive | DialectType::Spark);
16419
16420 match target {
16421 DialectType::Snowflake => {
16422 let unit =
16423 Expression::Identifier(Identifier::new(&unit_str));
16424 // Use ensure_to_date_preserved to add TO_DATE with a marker
16425 // that prevents the Snowflake TO_DATE handler from converting it to CAST
16426 let d1 = if is_hive_spark {
16427 Self::ensure_to_date_preserved(arg1)
16428 } else {
16429 arg1
16430 };
16431 let d2 = if is_hive_spark {
16432 Self::ensure_to_date_preserved(arg2)
16433 } else {
16434 arg2
16435 };
16436 Ok(Expression::Function(Box::new(Function::new(
16437 "DATEDIFF".to_string(),
16438 vec![unit, d1, d2],
16439 ))))
16440 }
16441 DialectType::Redshift => {
16442 let unit =
16443 Expression::Identifier(Identifier::new(&unit_str));
16444 let d1 = if is_hive_spark {
16445 Self::ensure_cast_date(arg1)
16446 } else {
16447 arg1
16448 };
16449 let d2 = if is_hive_spark {
16450 Self::ensure_cast_date(arg2)
16451 } else {
16452 arg2
16453 };
16454 Ok(Expression::Function(Box::new(Function::new(
16455 "DATEDIFF".to_string(),
16456 vec![unit, d1, d2],
16457 ))))
16458 }
16459 DialectType::TSQL => {
16460 let unit =
16461 Expression::Identifier(Identifier::new(&unit_str));
16462 Ok(Expression::Function(Box::new(Function::new(
16463 "DATEDIFF".to_string(),
16464 vec![unit, arg1, arg2],
16465 ))))
16466 }
16467 DialectType::DuckDB => {
16468 let is_redshift_tsql = matches!(
16469 source,
16470 DialectType::Redshift | DialectType::TSQL
16471 );
16472 if is_hive_spark {
16473 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
16474 let d1 = Self::ensure_cast_date(arg1);
16475 let d2 = Self::ensure_cast_date(arg2);
16476 Ok(Expression::Function(Box::new(Function::new(
16477 "DATE_DIFF".to_string(),
16478 vec![Expression::string(&unit_str), d1, d2],
16479 ))))
16480 } else if matches!(source, DialectType::Snowflake) {
16481 // For Snowflake source: special handling per unit
16482 match unit_str.as_str() {
16483 "NANOSECOND" => {
16484 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
16485 fn cast_to_timestamp_ns(
16486 expr: Expression,
16487 ) -> Expression
16488 {
16489 Expression::Cast(Box::new(Cast {
16490 this: expr,
16491 to: DataType::Custom {
16492 name: "TIMESTAMP_NS".to_string(),
16493 },
16494 trailing_comments: vec![],
16495 double_colon_syntax: false,
16496 format: None,
16497 default: None,
16498 inferred_type: None,
16499 }))
16500 }
16501 let epoch_end = Expression::Function(Box::new(
16502 Function::new(
16503 "EPOCH_NS".to_string(),
16504 vec![cast_to_timestamp_ns(arg2)],
16505 ),
16506 ));
16507 let epoch_start = Expression::Function(
16508 Box::new(Function::new(
16509 "EPOCH_NS".to_string(),
16510 vec![cast_to_timestamp_ns(arg1)],
16511 )),
16512 );
16513 Ok(Expression::Sub(Box::new(BinaryOp::new(
16514 epoch_end,
16515 epoch_start,
16516 ))))
16517 }
16518 "WEEK" => {
16519 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
16520 let d1 = Self::force_cast_date(arg1);
16521 let d2 = Self::force_cast_date(arg2);
16522 let dt1 = Expression::Function(Box::new(
16523 Function::new(
16524 "DATE_TRUNC".to_string(),
16525 vec![Expression::string("WEEK"), d1],
16526 ),
16527 ));
16528 let dt2 = Expression::Function(Box::new(
16529 Function::new(
16530 "DATE_TRUNC".to_string(),
16531 vec![Expression::string("WEEK"), d2],
16532 ),
16533 ));
16534 Ok(Expression::Function(Box::new(
16535 Function::new(
16536 "DATE_DIFF".to_string(),
16537 vec![
16538 Expression::string(&unit_str),
16539 dt1,
16540 dt2,
16541 ],
16542 ),
16543 )))
16544 }
16545 _ => {
16546 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
16547 let d1 = Self::force_cast_date(arg1);
16548 let d2 = Self::force_cast_date(arg2);
16549 Ok(Expression::Function(Box::new(
16550 Function::new(
16551 "DATE_DIFF".to_string(),
16552 vec![
16553 Expression::string(&unit_str),
16554 d1,
16555 d2,
16556 ],
16557 ),
16558 )))
16559 }
16560 }
16561 } else if is_redshift_tsql {
16562 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
16563 let d1 = Self::force_cast_timestamp(arg1);
16564 let d2 = Self::force_cast_timestamp(arg2);
16565 Ok(Expression::Function(Box::new(Function::new(
16566 "DATE_DIFF".to_string(),
16567 vec![Expression::string(&unit_str), d1, d2],
16568 ))))
16569 } else {
16570 // Keep as DATEDIFF so DuckDB's transform_datediff handles
16571 // DATE_TRUNC for WEEK, CAST for string literals, etc.
16572 let unit =
16573 Expression::Identifier(Identifier::new(&unit_str));
16574 Ok(Expression::Function(Box::new(Function::new(
16575 "DATEDIFF".to_string(),
16576 vec![unit, arg1, arg2],
16577 ))))
16578 }
16579 }
16580 DialectType::BigQuery => {
16581 let is_redshift_tsql = matches!(
16582 source,
16583 DialectType::Redshift
16584 | DialectType::TSQL
16585 | DialectType::Snowflake
16586 );
16587 let cast_d1 = if is_hive_spark {
16588 Self::ensure_cast_date(arg1)
16589 } else if is_redshift_tsql {
16590 Self::force_cast_datetime(arg1)
16591 } else {
16592 Self::ensure_cast_datetime(arg1)
16593 };
16594 let cast_d2 = if is_hive_spark {
16595 Self::ensure_cast_date(arg2)
16596 } else if is_redshift_tsql {
16597 Self::force_cast_datetime(arg2)
16598 } else {
16599 Self::ensure_cast_datetime(arg2)
16600 };
16601 let unit =
16602 Expression::Identifier(Identifier::new(&unit_str));
16603 Ok(Expression::Function(Box::new(Function::new(
16604 "DATE_DIFF".to_string(),
16605 vec![cast_d2, cast_d1, unit],
16606 ))))
16607 }
16608 DialectType::Presto
16609 | DialectType::Trino
16610 | DialectType::Athena => {
16611 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
16612 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
16613 let is_redshift_tsql = matches!(
16614 source,
16615 DialectType::Redshift
16616 | DialectType::TSQL
16617 | DialectType::Snowflake
16618 );
16619 let d1 = if is_hive_spark {
16620 Self::double_cast_timestamp_date(arg1)
16621 } else if is_redshift_tsql {
16622 Self::force_cast_timestamp(arg1)
16623 } else {
16624 arg1
16625 };
16626 let d2 = if is_hive_spark {
16627 Self::double_cast_timestamp_date(arg2)
16628 } else if is_redshift_tsql {
16629 Self::force_cast_timestamp(arg2)
16630 } else {
16631 arg2
16632 };
16633 Ok(Expression::Function(Box::new(Function::new(
16634 "DATE_DIFF".to_string(),
16635 vec![Expression::string(&unit_str), d1, d2],
16636 ))))
16637 }
16638 DialectType::Hive => match unit_str.as_str() {
16639 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
16640 this: Expression::Function(Box::new(Function::new(
16641 "MONTHS_BETWEEN".to_string(),
16642 vec![arg2, arg1],
16643 ))),
16644 to: DataType::Int {
16645 length: None,
16646 integer_spelling: false,
16647 },
16648 trailing_comments: vec![],
16649 double_colon_syntax: false,
16650 format: None,
16651 default: None,
16652 inferred_type: None,
16653 }))),
16654 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
16655 this: Expression::Div(Box::new(
16656 crate::expressions::BinaryOp::new(
16657 Expression::Function(Box::new(Function::new(
16658 "DATEDIFF".to_string(),
16659 vec![arg2, arg1],
16660 ))),
16661 Expression::number(7),
16662 ),
16663 )),
16664 to: DataType::Int {
16665 length: None,
16666 integer_spelling: false,
16667 },
16668 trailing_comments: vec![],
16669 double_colon_syntax: false,
16670 format: None,
16671 default: None,
16672 inferred_type: None,
16673 }))),
16674 _ => Ok(Expression::Function(Box::new(Function::new(
16675 "DATEDIFF".to_string(),
16676 vec![arg2, arg1],
16677 )))),
16678 },
16679 DialectType::Spark | DialectType::Databricks => {
16680 let unit =
16681 Expression::Identifier(Identifier::new(&unit_str));
16682 Ok(Expression::Function(Box::new(Function::new(
16683 "DATEDIFF".to_string(),
16684 vec![unit, arg1, arg2],
16685 ))))
16686 }
16687 _ => {
16688 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
16689 let d1 = if is_hive_spark {
16690 Self::ensure_cast_date(arg1)
16691 } else {
16692 arg1
16693 };
16694 let d2 = if is_hive_spark {
16695 Self::ensure_cast_date(arg2)
16696 } else {
16697 arg2
16698 };
16699 let unit =
16700 Expression::Identifier(Identifier::new(&unit_str));
16701 Ok(Expression::Function(Box::new(Function::new(
16702 "DATEDIFF".to_string(),
16703 vec![unit, d1, d2],
16704 ))))
16705 }
16706 }
16707 }
16708 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
16709 "DATEDIFF" if f.args.len() == 2 => {
16710 let mut args = f.args;
16711 let arg0 = args.remove(0);
16712 let arg1 = args.remove(0);
16713
16714 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
16715 // Also recognizes TryCast/Cast to DATE that may have been produced by
16716 // cross-dialect TO_DATE -> TRY_CAST conversion
16717 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
16718 if let Expression::Function(ref f) = e {
16719 if f.name.eq_ignore_ascii_case("TO_DATE")
16720 && f.args.len() == 1
16721 {
16722 return (f.args[0].clone(), true);
16723 }
16724 }
16725 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
16726 if let Expression::TryCast(ref c) = e {
16727 if matches!(c.to, DataType::Date) {
16728 return (e, true); // Already properly cast, return as-is
16729 }
16730 }
16731 (e, false)
16732 };
16733
16734 match target {
16735 DialectType::DuckDB => {
16736 // For Hive source, always CAST to DATE
16737 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
16738 let cast_d0 = if matches!(
16739 source,
16740 DialectType::Hive
16741 | DialectType::Spark
16742 | DialectType::Databricks
16743 ) {
16744 let (inner, was_to_date) = unwrap_to_date(arg1);
16745 if was_to_date {
16746 // Already a date expression, use directly
16747 if matches!(&inner, Expression::TryCast(_)) {
16748 inner // Already TRY_CAST(x AS DATE)
16749 } else {
16750 Self::try_cast_date(inner)
16751 }
16752 } else {
16753 Self::force_cast_date(inner)
16754 }
16755 } else {
16756 Self::ensure_cast_date(arg1)
16757 };
16758 let cast_d1 = if matches!(
16759 source,
16760 DialectType::Hive
16761 | DialectType::Spark
16762 | DialectType::Databricks
16763 ) {
16764 let (inner, was_to_date) = unwrap_to_date(arg0);
16765 if was_to_date {
16766 if matches!(&inner, Expression::TryCast(_)) {
16767 inner
16768 } else {
16769 Self::try_cast_date(inner)
16770 }
16771 } else {
16772 Self::force_cast_date(inner)
16773 }
16774 } else {
16775 Self::ensure_cast_date(arg0)
16776 };
16777 Ok(Expression::Function(Box::new(Function::new(
16778 "DATE_DIFF".to_string(),
16779 vec![Expression::string("DAY"), cast_d0, cast_d1],
16780 ))))
16781 }
16782 DialectType::Presto
16783 | DialectType::Trino
16784 | DialectType::Athena => {
16785 // For Hive/Spark source, apply double_cast_timestamp_date
16786 // For other sources (MySQL etc.), just swap args without casting
16787 if matches!(
16788 source,
16789 DialectType::Hive
16790 | DialectType::Spark
16791 | DialectType::Databricks
16792 ) {
16793 let cast_fn = |e: Expression| -> Expression {
16794 let (inner, was_to_date) = unwrap_to_date(e);
16795 if was_to_date {
16796 let first_cast =
16797 Self::double_cast_timestamp_date(inner);
16798 Self::double_cast_timestamp_date(first_cast)
16799 } else {
16800 Self::double_cast_timestamp_date(inner)
16801 }
16802 };
16803 Ok(Expression::Function(Box::new(Function::new(
16804 "DATE_DIFF".to_string(),
16805 vec![
16806 Expression::string("DAY"),
16807 cast_fn(arg1),
16808 cast_fn(arg0),
16809 ],
16810 ))))
16811 } else {
16812 Ok(Expression::Function(Box::new(Function::new(
16813 "DATE_DIFF".to_string(),
16814 vec![Expression::string("DAY"), arg1, arg0],
16815 ))))
16816 }
16817 }
16818 DialectType::Redshift => {
16819 let unit = Expression::Identifier(Identifier::new("DAY"));
16820 Ok(Expression::Function(Box::new(Function::new(
16821 "DATEDIFF".to_string(),
16822 vec![unit, arg1, arg0],
16823 ))))
16824 }
16825 _ => Ok(Expression::Function(Box::new(Function::new(
16826 "DATEDIFF".to_string(),
16827 vec![arg0, arg1],
16828 )))),
16829 }
16830 }
16831 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
16832 "DATE_DIFF" if f.args.len() == 3 => {
16833 let mut args = f.args;
16834 let arg0 = args.remove(0);
16835 let arg1 = args.remove(0);
16836 let arg2 = args.remove(0);
16837 let unit_str = Self::get_unit_str_static(&arg0);
16838
16839 match target {
16840 DialectType::DuckDB => {
16841 // DuckDB: DATE_DIFF('UNIT', start, end)
16842 Ok(Expression::Function(Box::new(Function::new(
16843 "DATE_DIFF".to_string(),
16844 vec![Expression::string(&unit_str), arg1, arg2],
16845 ))))
16846 }
16847 DialectType::Presto
16848 | DialectType::Trino
16849 | DialectType::Athena => {
16850 Ok(Expression::Function(Box::new(Function::new(
16851 "DATE_DIFF".to_string(),
16852 vec![Expression::string(&unit_str), arg1, arg2],
16853 ))))
16854 }
16855 DialectType::ClickHouse => {
16856 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
16857 let unit =
16858 Expression::Identifier(Identifier::new(&unit_str));
16859 Ok(Expression::Function(Box::new(Function::new(
16860 "DATE_DIFF".to_string(),
16861 vec![unit, arg1, arg2],
16862 ))))
16863 }
16864 DialectType::Snowflake | DialectType::Redshift => {
16865 let unit =
16866 Expression::Identifier(Identifier::new(&unit_str));
16867 Ok(Expression::Function(Box::new(Function::new(
16868 "DATEDIFF".to_string(),
16869 vec![unit, arg1, arg2],
16870 ))))
16871 }
16872 _ => {
16873 let unit =
16874 Expression::Identifier(Identifier::new(&unit_str));
16875 Ok(Expression::Function(Box::new(Function::new(
16876 "DATEDIFF".to_string(),
16877 vec![unit, arg1, arg2],
16878 ))))
16879 }
16880 }
16881 }
16882 // DATEADD(unit, val, date) - 3-arg form
16883 "DATEADD" if f.args.len() == 3 => {
16884 let mut args = f.args;
16885 let arg0 = args.remove(0);
16886 let arg1 = args.remove(0);
16887 let arg2 = args.remove(0);
16888 let unit_str = Self::get_unit_str_static(&arg0);
16889
16890 // Normalize TSQL unit abbreviations to standard names
16891 let unit_str = match unit_str.as_str() {
16892 "YY" | "YYYY" => "YEAR".to_string(),
16893 "QQ" | "Q" => "QUARTER".to_string(),
16894 "MM" | "M" => "MONTH".to_string(),
16895 "WK" | "WW" => "WEEK".to_string(),
16896 "DD" | "D" | "DY" => "DAY".to_string(),
16897 "HH" => "HOUR".to_string(),
16898 "MI" | "N" => "MINUTE".to_string(),
16899 "SS" | "S" => "SECOND".to_string(),
16900 "MS" => "MILLISECOND".to_string(),
16901 "MCS" | "US" => "MICROSECOND".to_string(),
16902 _ => unit_str,
16903 };
16904 match target {
16905 DialectType::Snowflake => {
16906 let unit =
16907 Expression::Identifier(Identifier::new(&unit_str));
16908 // Cast string literal to TIMESTAMP, but not for Snowflake source
16909 // (Snowflake natively accepts string literals in DATEADD)
16910 let arg2 = if matches!(
16911 &arg2,
16912 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16913 ) && !matches!(source, DialectType::Snowflake)
16914 {
16915 Expression::Cast(Box::new(Cast {
16916 this: arg2,
16917 to: DataType::Timestamp {
16918 precision: None,
16919 timezone: false,
16920 },
16921 trailing_comments: Vec::new(),
16922 double_colon_syntax: false,
16923 format: None,
16924 default: None,
16925 inferred_type: None,
16926 }))
16927 } else {
16928 arg2
16929 };
16930 Ok(Expression::Function(Box::new(Function::new(
16931 "DATEADD".to_string(),
16932 vec![unit, arg1, arg2],
16933 ))))
16934 }
16935 DialectType::TSQL => {
16936 let unit =
16937 Expression::Identifier(Identifier::new(&unit_str));
16938 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
16939 let arg2 = if matches!(
16940 &arg2,
16941 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16942 ) && !matches!(
16943 source,
16944 DialectType::Spark
16945 | DialectType::Databricks
16946 | DialectType::Hive
16947 ) {
16948 Expression::Cast(Box::new(Cast {
16949 this: arg2,
16950 to: DataType::Custom {
16951 name: "DATETIME2".to_string(),
16952 },
16953 trailing_comments: Vec::new(),
16954 double_colon_syntax: false,
16955 format: None,
16956 default: None,
16957 inferred_type: None,
16958 }))
16959 } else {
16960 arg2
16961 };
16962 Ok(Expression::Function(Box::new(Function::new(
16963 "DATEADD".to_string(),
16964 vec![unit, arg1, arg2],
16965 ))))
16966 }
16967 DialectType::Redshift => {
16968 let unit =
16969 Expression::Identifier(Identifier::new(&unit_str));
16970 Ok(Expression::Function(Box::new(Function::new(
16971 "DATEADD".to_string(),
16972 vec![unit, arg1, arg2],
16973 ))))
16974 }
16975 DialectType::Databricks => {
16976 let unit =
16977 Expression::Identifier(Identifier::new(&unit_str));
16978 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
16979 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
16980 let func_name = if matches!(
16981 source,
16982 DialectType::TSQL
16983 | DialectType::Fabric
16984 | DialectType::Databricks
16985 | DialectType::Snowflake
16986 ) {
16987 "DATEADD"
16988 } else {
16989 "DATE_ADD"
16990 };
16991 Ok(Expression::Function(Box::new(Function::new(
16992 func_name.to_string(),
16993 vec![unit, arg1, arg2],
16994 ))))
16995 }
16996 DialectType::DuckDB => {
16997 // Special handling for NANOSECOND from Snowflake
16998 if unit_str == "NANOSECOND"
16999 && matches!(source, DialectType::Snowflake)
17000 {
17001 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
17002 let cast_ts = Expression::Cast(Box::new(Cast {
17003 this: arg2,
17004 to: DataType::Custom {
17005 name: "TIMESTAMP_NS".to_string(),
17006 },
17007 trailing_comments: vec![],
17008 double_colon_syntax: false,
17009 format: None,
17010 default: None,
17011 inferred_type: None,
17012 }));
17013 let epoch_ns =
17014 Expression::Function(Box::new(Function::new(
17015 "EPOCH_NS".to_string(),
17016 vec![cast_ts],
17017 )));
17018 let sum = Expression::Add(Box::new(BinaryOp::new(
17019 epoch_ns, arg1,
17020 )));
17021 Ok(Expression::Function(Box::new(Function::new(
17022 "MAKE_TIMESTAMP_NS".to_string(),
17023 vec![sum],
17024 ))))
17025 } else {
17026 // DuckDB: convert to date + INTERVAL syntax with CAST
17027 let iu = Self::parse_interval_unit_static(&unit_str);
17028 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
17029 this: Some(arg1),
17030 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
17031 }));
17032 // Cast string literal to TIMESTAMP
17033 let arg2 = if matches!(
17034 &arg2,
17035 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17036 ) {
17037 Expression::Cast(Box::new(Cast {
17038 this: arg2,
17039 to: DataType::Timestamp {
17040 precision: None,
17041 timezone: false,
17042 },
17043 trailing_comments: Vec::new(),
17044 double_colon_syntax: false,
17045 format: None,
17046 default: None,
17047 inferred_type: None,
17048 }))
17049 } else {
17050 arg2
17051 };
17052 Ok(Expression::Add(Box::new(
17053 crate::expressions::BinaryOp::new(arg2, interval),
17054 )))
17055 }
17056 }
17057 DialectType::Spark => {
17058 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
17059 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
17060 if matches!(source, DialectType::TSQL | DialectType::Fabric)
17061 {
17062 fn multiply_expr_spark(
17063 expr: Expression,
17064 factor: i64,
17065 ) -> Expression
17066 {
17067 if let Expression::Literal(lit) = &expr {
17068 if let crate::expressions::Literal::Number(n) =
17069 lit.as_ref()
17070 {
17071 if let Ok(val) = n.parse::<i64>() {
17072 return Expression::Literal(Box::new(
17073 crate::expressions::Literal::Number(
17074 (val * factor).to_string(),
17075 ),
17076 ));
17077 }
17078 }
17079 }
17080 Expression::Mul(Box::new(
17081 crate::expressions::BinaryOp::new(
17082 expr,
17083 Expression::Literal(Box::new(
17084 crate::expressions::Literal::Number(
17085 factor.to_string(),
17086 ),
17087 )),
17088 ),
17089 ))
17090 }
17091 let normalized_unit = match unit_str.as_str() {
17092 "YEAR" | "YY" | "YYYY" => "YEAR",
17093 "QUARTER" | "QQ" | "Q" => "QUARTER",
17094 "MONTH" | "MM" | "M" => "MONTH",
17095 "WEEK" | "WK" | "WW" => "WEEK",
17096 "DAY" | "DD" | "D" | "DY" => "DAY",
17097 _ => &unit_str,
17098 };
17099 match normalized_unit {
17100 "YEAR" => {
17101 let months = multiply_expr_spark(arg1, 12);
17102 Ok(Expression::Function(Box::new(
17103 Function::new(
17104 "ADD_MONTHS".to_string(),
17105 vec![arg2, months],
17106 ),
17107 )))
17108 }
17109 "QUARTER" => {
17110 let months = multiply_expr_spark(arg1, 3);
17111 Ok(Expression::Function(Box::new(
17112 Function::new(
17113 "ADD_MONTHS".to_string(),
17114 vec![arg2, months],
17115 ),
17116 )))
17117 }
17118 "MONTH" => Ok(Expression::Function(Box::new(
17119 Function::new(
17120 "ADD_MONTHS".to_string(),
17121 vec![arg2, arg1],
17122 ),
17123 ))),
17124 "WEEK" => {
17125 let days = multiply_expr_spark(arg1, 7);
17126 Ok(Expression::Function(Box::new(
17127 Function::new(
17128 "DATE_ADD".to_string(),
17129 vec![arg2, days],
17130 ),
17131 )))
17132 }
17133 "DAY" => Ok(Expression::Function(Box::new(
17134 Function::new(
17135 "DATE_ADD".to_string(),
17136 vec![arg2, arg1],
17137 ),
17138 ))),
17139 _ => {
17140 let unit = Expression::Identifier(
17141 Identifier::new(&unit_str),
17142 );
17143 Ok(Expression::Function(Box::new(
17144 Function::new(
17145 "DATE_ADD".to_string(),
17146 vec![unit, arg1, arg2],
17147 ),
17148 )))
17149 }
17150 }
17151 } else {
17152 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
17153 let unit =
17154 Expression::Identifier(Identifier::new(&unit_str));
17155 Ok(Expression::Function(Box::new(Function::new(
17156 "DATE_ADD".to_string(),
17157 vec![unit, arg1, arg2],
17158 ))))
17159 }
17160 }
17161 DialectType::Hive => match unit_str.as_str() {
17162 "MONTH" => {
17163 Ok(Expression::Function(Box::new(Function::new(
17164 "ADD_MONTHS".to_string(),
17165 vec![arg2, arg1],
17166 ))))
17167 }
17168 _ => Ok(Expression::Function(Box::new(Function::new(
17169 "DATE_ADD".to_string(),
17170 vec![arg2, arg1],
17171 )))),
17172 },
17173 DialectType::Presto
17174 | DialectType::Trino
17175 | DialectType::Athena => {
17176 // Cast string literal date to TIMESTAMP
17177 let arg2 = if matches!(
17178 &arg2,
17179 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17180 ) {
17181 Expression::Cast(Box::new(Cast {
17182 this: arg2,
17183 to: DataType::Timestamp {
17184 precision: None,
17185 timezone: false,
17186 },
17187 trailing_comments: Vec::new(),
17188 double_colon_syntax: false,
17189 format: None,
17190 default: None,
17191 inferred_type: None,
17192 }))
17193 } else {
17194 arg2
17195 };
17196 Ok(Expression::Function(Box::new(Function::new(
17197 "DATE_ADD".to_string(),
17198 vec![Expression::string(&unit_str), arg1, arg2],
17199 ))))
17200 }
17201 DialectType::MySQL => {
17202 let iu = Self::parse_interval_unit_static(&unit_str);
17203 Ok(Expression::DateAdd(Box::new(
17204 crate::expressions::DateAddFunc {
17205 this: arg2,
17206 interval: arg1,
17207 unit: iu,
17208 },
17209 )))
17210 }
17211 DialectType::PostgreSQL => {
17212 // Cast string literal date to TIMESTAMP
17213 let arg2 = if matches!(
17214 &arg2,
17215 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17216 ) {
17217 Expression::Cast(Box::new(Cast {
17218 this: arg2,
17219 to: DataType::Timestamp {
17220 precision: None,
17221 timezone: false,
17222 },
17223 trailing_comments: Vec::new(),
17224 double_colon_syntax: false,
17225 format: None,
17226 default: None,
17227 inferred_type: None,
17228 }))
17229 } else {
17230 arg2
17231 };
17232 let interval = Expression::Interval(Box::new(
17233 crate::expressions::Interval {
17234 this: Some(Expression::string(&format!(
17235 "{} {}",
17236 Self::expr_to_string_static(&arg1),
17237 unit_str
17238 ))),
17239 unit: None,
17240 },
17241 ));
17242 Ok(Expression::Add(Box::new(
17243 crate::expressions::BinaryOp::new(arg2, interval),
17244 )))
17245 }
17246 DialectType::BigQuery => {
17247 let iu = Self::parse_interval_unit_static(&unit_str);
17248 let interval = Expression::Interval(Box::new(
17249 crate::expressions::Interval {
17250 this: Some(arg1),
17251 unit: Some(
17252 crate::expressions::IntervalUnitSpec::Simple {
17253 unit: iu,
17254 use_plural: false,
17255 },
17256 ),
17257 },
17258 ));
17259 // Non-TSQL sources: CAST string literal to DATETIME
17260 let arg2 = if !matches!(
17261 source,
17262 DialectType::TSQL | DialectType::Fabric
17263 ) && matches!(
17264 &arg2,
17265 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17266 ) {
17267 Expression::Cast(Box::new(Cast {
17268 this: arg2,
17269 to: DataType::Custom {
17270 name: "DATETIME".to_string(),
17271 },
17272 trailing_comments: Vec::new(),
17273 double_colon_syntax: false,
17274 format: None,
17275 default: None,
17276 inferred_type: None,
17277 }))
17278 } else {
17279 arg2
17280 };
17281 Ok(Expression::Function(Box::new(Function::new(
17282 "DATE_ADD".to_string(),
17283 vec![arg2, interval],
17284 ))))
17285 }
17286 _ => {
17287 let unit =
17288 Expression::Identifier(Identifier::new(&unit_str));
17289 Ok(Expression::Function(Box::new(Function::new(
17290 "DATEADD".to_string(),
17291 vec![unit, arg1, arg2],
17292 ))))
17293 }
17294 }
17295 }
17296 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
17297 // or (date, val, 'UNIT') from Generic canonical form
17298 "DATE_ADD" if f.args.len() == 3 => {
17299 let mut args = f.args;
17300 let arg0 = args.remove(0);
17301 let arg1 = args.remove(0);
17302 let arg2 = args.remove(0);
17303 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
17304 // where arg2 is a string literal matching a unit name
17305 let arg2_unit = match &arg2 {
17306 Expression::Literal(lit)
17307 if matches!(lit.as_ref(), Literal::String(_)) =>
17308 {
17309 let Literal::String(s) = lit.as_ref() else {
17310 unreachable!()
17311 };
17312 let u = s.to_ascii_uppercase();
17313 if matches!(
17314 u.as_str(),
17315 "DAY"
17316 | "MONTH"
17317 | "YEAR"
17318 | "HOUR"
17319 | "MINUTE"
17320 | "SECOND"
17321 | "WEEK"
17322 | "QUARTER"
17323 | "MILLISECOND"
17324 | "MICROSECOND"
17325 ) {
17326 Some(u)
17327 } else {
17328 None
17329 }
17330 }
17331 _ => None,
17332 };
17333 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
17334 let (unit_str, val, date) = if let Some(u) = arg2_unit {
17335 (u, arg1, arg0)
17336 } else {
17337 (Self::get_unit_str_static(&arg0), arg1, arg2)
17338 };
17339 // Alias for backward compat with the rest of the match
17340 let arg1 = val;
17341 let arg2 = date;
17342
17343 match target {
17344 DialectType::Presto
17345 | DialectType::Trino
17346 | DialectType::Athena => {
17347 Ok(Expression::Function(Box::new(Function::new(
17348 "DATE_ADD".to_string(),
17349 vec![Expression::string(&unit_str), arg1, arg2],
17350 ))))
17351 }
17352 DialectType::DuckDB => {
17353 let iu = Self::parse_interval_unit_static(&unit_str);
17354 let interval = Expression::Interval(Box::new(
17355 crate::expressions::Interval {
17356 this: Some(arg1),
17357 unit: Some(
17358 crate::expressions::IntervalUnitSpec::Simple {
17359 unit: iu,
17360 use_plural: false,
17361 },
17362 ),
17363 },
17364 ));
17365 Ok(Expression::Add(Box::new(
17366 crate::expressions::BinaryOp::new(arg2, interval),
17367 )))
17368 }
17369 DialectType::PostgreSQL
17370 | DialectType::Materialize
17371 | DialectType::RisingWave => {
17372 // PostgreSQL: x + INTERVAL '1 DAY'
17373 let amount_str = Self::expr_to_string_static(&arg1);
17374 let interval = Expression::Interval(Box::new(
17375 crate::expressions::Interval {
17376 this: Some(Expression::string(&format!(
17377 "{} {}",
17378 amount_str, unit_str
17379 ))),
17380 unit: None,
17381 },
17382 ));
17383 Ok(Expression::Add(Box::new(
17384 crate::expressions::BinaryOp::new(arg2, interval),
17385 )))
17386 }
17387 DialectType::Snowflake
17388 | DialectType::TSQL
17389 | DialectType::Redshift => {
17390 let unit =
17391 Expression::Identifier(Identifier::new(&unit_str));
17392 Ok(Expression::Function(Box::new(Function::new(
17393 "DATEADD".to_string(),
17394 vec![unit, arg1, arg2],
17395 ))))
17396 }
17397 DialectType::BigQuery
17398 | DialectType::MySQL
17399 | DialectType::Doris
17400 | DialectType::StarRocks
17401 | DialectType::Drill => {
17402 // DATE_ADD(date, INTERVAL amount UNIT)
17403 let iu = Self::parse_interval_unit_static(&unit_str);
17404 let interval = Expression::Interval(Box::new(
17405 crate::expressions::Interval {
17406 this: Some(arg1),
17407 unit: Some(
17408 crate::expressions::IntervalUnitSpec::Simple {
17409 unit: iu,
17410 use_plural: false,
17411 },
17412 ),
17413 },
17414 ));
17415 Ok(Expression::Function(Box::new(Function::new(
17416 "DATE_ADD".to_string(),
17417 vec![arg2, interval],
17418 ))))
17419 }
17420 DialectType::SQLite => {
17421 // SQLite: DATE(x, '1 DAY')
17422 // Build the string '1 DAY' from amount and unit
17423 let amount_str = match &arg1 {
17424 Expression::Literal(lit)
17425 if matches!(lit.as_ref(), Literal::Number(_)) =>
17426 {
17427 let Literal::Number(n) = lit.as_ref() else {
17428 unreachable!()
17429 };
17430 n.clone()
17431 }
17432 _ => "1".to_string(),
17433 };
17434 Ok(Expression::Function(Box::new(Function::new(
17435 "DATE".to_string(),
17436 vec![
17437 arg2,
17438 Expression::string(format!(
17439 "{} {}",
17440 amount_str, unit_str
17441 )),
17442 ],
17443 ))))
17444 }
17445 DialectType::Dremio => {
17446 // Dremio: DATE_ADD(date, amount) - drops unit
17447 Ok(Expression::Function(Box::new(Function::new(
17448 "DATE_ADD".to_string(),
17449 vec![arg2, arg1],
17450 ))))
17451 }
17452 DialectType::Spark => {
17453 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
17454 if unit_str == "DAY" {
17455 Ok(Expression::Function(Box::new(Function::new(
17456 "DATE_ADD".to_string(),
17457 vec![arg2, arg1],
17458 ))))
17459 } else {
17460 let unit =
17461 Expression::Identifier(Identifier::new(&unit_str));
17462 Ok(Expression::Function(Box::new(Function::new(
17463 "DATE_ADD".to_string(),
17464 vec![unit, arg1, arg2],
17465 ))))
17466 }
17467 }
17468 DialectType::Databricks => {
17469 let unit =
17470 Expression::Identifier(Identifier::new(&unit_str));
17471 Ok(Expression::Function(Box::new(Function::new(
17472 "DATE_ADD".to_string(),
17473 vec![unit, arg1, arg2],
17474 ))))
17475 }
17476 DialectType::Hive => {
17477 // Hive: DATE_ADD(date, val) for DAY
17478 Ok(Expression::Function(Box::new(Function::new(
17479 "DATE_ADD".to_string(),
17480 vec![arg2, arg1],
17481 ))))
17482 }
17483 _ => {
17484 let unit =
17485 Expression::Identifier(Identifier::new(&unit_str));
17486 Ok(Expression::Function(Box::new(Function::new(
17487 "DATE_ADD".to_string(),
17488 vec![unit, arg1, arg2],
17489 ))))
17490 }
17491 }
17492 }
17493 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
17494 "DATE_ADD"
17495 if f.args.len() == 2
17496 && matches!(
17497 source,
17498 DialectType::Hive
17499 | DialectType::Spark
17500 | DialectType::Databricks
17501 | DialectType::Generic
17502 ) =>
17503 {
17504 let mut args = f.args;
17505 let date = args.remove(0);
17506 let days = args.remove(0);
17507 match target {
17508 DialectType::Hive | DialectType::Spark => {
17509 // Keep as DATE_ADD(date, days) for Hive/Spark
17510 Ok(Expression::Function(Box::new(Function::new(
17511 "DATE_ADD".to_string(),
17512 vec![date, days],
17513 ))))
17514 }
17515 DialectType::Databricks => {
17516 // Databricks: DATEADD(DAY, days, date)
17517 Ok(Expression::Function(Box::new(Function::new(
17518 "DATEADD".to_string(),
17519 vec![
17520 Expression::Identifier(Identifier::new("DAY")),
17521 days,
17522 date,
17523 ],
17524 ))))
17525 }
17526 DialectType::DuckDB => {
17527 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
17528 let cast_date = Self::ensure_cast_date(date);
17529 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
17530 let interval_val = if matches!(
17531 days,
17532 Expression::Mul(_)
17533 | Expression::Sub(_)
17534 | Expression::Add(_)
17535 ) {
17536 Expression::Paren(Box::new(crate::expressions::Paren {
17537 this: days,
17538 trailing_comments: vec![],
17539 }))
17540 } else {
17541 days
17542 };
17543 let interval = Expression::Interval(Box::new(
17544 crate::expressions::Interval {
17545 this: Some(interval_val),
17546 unit: Some(
17547 crate::expressions::IntervalUnitSpec::Simple {
17548 unit: crate::expressions::IntervalUnit::Day,
17549 use_plural: false,
17550 },
17551 ),
17552 },
17553 ));
17554 Ok(Expression::Add(Box::new(
17555 crate::expressions::BinaryOp::new(cast_date, interval),
17556 )))
17557 }
17558 DialectType::Snowflake => {
17559 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17560 let cast_date = if matches!(
17561 source,
17562 DialectType::Hive
17563 | DialectType::Spark
17564 | DialectType::Databricks
17565 ) {
17566 if matches!(
17567 date,
17568 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17569 ) {
17570 Self::double_cast_timestamp_date(date)
17571 } else {
17572 date
17573 }
17574 } else {
17575 date
17576 };
17577 Ok(Expression::Function(Box::new(Function::new(
17578 "DATEADD".to_string(),
17579 vec![
17580 Expression::Identifier(Identifier::new("DAY")),
17581 days,
17582 cast_date,
17583 ],
17584 ))))
17585 }
17586 DialectType::Redshift => {
17587 Ok(Expression::Function(Box::new(Function::new(
17588 "DATEADD".to_string(),
17589 vec![
17590 Expression::Identifier(Identifier::new("DAY")),
17591 days,
17592 date,
17593 ],
17594 ))))
17595 }
17596 DialectType::TSQL | DialectType::Fabric => {
17597 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
17598 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
17599 let cast_date = if matches!(
17600 source,
17601 DialectType::Hive | DialectType::Spark
17602 ) {
17603 if matches!(
17604 date,
17605 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17606 ) {
17607 Self::double_cast_datetime2_date(date)
17608 } else {
17609 date
17610 }
17611 } else {
17612 date
17613 };
17614 Ok(Expression::Function(Box::new(Function::new(
17615 "DATEADD".to_string(),
17616 vec![
17617 Expression::Identifier(Identifier::new("DAY")),
17618 days,
17619 cast_date,
17620 ],
17621 ))))
17622 }
17623 DialectType::Presto
17624 | DialectType::Trino
17625 | DialectType::Athena => {
17626 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17627 let cast_date = if matches!(
17628 source,
17629 DialectType::Hive
17630 | DialectType::Spark
17631 | DialectType::Databricks
17632 ) {
17633 if matches!(
17634 date,
17635 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17636 ) {
17637 Self::double_cast_timestamp_date(date)
17638 } else {
17639 date
17640 }
17641 } else {
17642 date
17643 };
17644 Ok(Expression::Function(Box::new(Function::new(
17645 "DATE_ADD".to_string(),
17646 vec![Expression::string("DAY"), days, cast_date],
17647 ))))
17648 }
17649 DialectType::BigQuery => {
17650 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
17651 let cast_date = if matches!(
17652 source,
17653 DialectType::Hive
17654 | DialectType::Spark
17655 | DialectType::Databricks
17656 ) {
17657 Self::double_cast_datetime_date(date)
17658 } else {
17659 date
17660 };
17661 // Wrap complex expressions in Paren for interval
17662 let interval_val = if matches!(
17663 days,
17664 Expression::Mul(_)
17665 | Expression::Sub(_)
17666 | Expression::Add(_)
17667 ) {
17668 Expression::Paren(Box::new(crate::expressions::Paren {
17669 this: days,
17670 trailing_comments: vec![],
17671 }))
17672 } else {
17673 days
17674 };
17675 let interval = Expression::Interval(Box::new(
17676 crate::expressions::Interval {
17677 this: Some(interval_val),
17678 unit: Some(
17679 crate::expressions::IntervalUnitSpec::Simple {
17680 unit: crate::expressions::IntervalUnit::Day,
17681 use_plural: false,
17682 },
17683 ),
17684 },
17685 ));
17686 Ok(Expression::Function(Box::new(Function::new(
17687 "DATE_ADD".to_string(),
17688 vec![cast_date, interval],
17689 ))))
17690 }
17691 DialectType::MySQL => {
17692 let iu = crate::expressions::IntervalUnit::Day;
17693 Ok(Expression::DateAdd(Box::new(
17694 crate::expressions::DateAddFunc {
17695 this: date,
17696 interval: days,
17697 unit: iu,
17698 },
17699 )))
17700 }
17701 DialectType::PostgreSQL => {
17702 let interval = Expression::Interval(Box::new(
17703 crate::expressions::Interval {
17704 this: Some(Expression::string(&format!(
17705 "{} DAY",
17706 Self::expr_to_string_static(&days)
17707 ))),
17708 unit: None,
17709 },
17710 ));
17711 Ok(Expression::Add(Box::new(
17712 crate::expressions::BinaryOp::new(date, interval),
17713 )))
17714 }
17715 DialectType::Doris
17716 | DialectType::StarRocks
17717 | DialectType::Drill => {
17718 // DATE_ADD(date, INTERVAL days DAY)
17719 let interval = Expression::Interval(Box::new(
17720 crate::expressions::Interval {
17721 this: Some(days),
17722 unit: Some(
17723 crate::expressions::IntervalUnitSpec::Simple {
17724 unit: crate::expressions::IntervalUnit::Day,
17725 use_plural: false,
17726 },
17727 ),
17728 },
17729 ));
17730 Ok(Expression::Function(Box::new(Function::new(
17731 "DATE_ADD".to_string(),
17732 vec![date, interval],
17733 ))))
17734 }
17735 _ => Ok(Expression::Function(Box::new(Function::new(
17736 "DATE_ADD".to_string(),
17737 vec![date, days],
17738 )))),
17739 }
17740 }
17741 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
17742 "DATE_ADD"
17743 if f.args.len() == 2
17744 && matches!(
17745 source,
17746 DialectType::MySQL | DialectType::SingleStore
17747 )
17748 && matches!(&f.args[1], Expression::Interval(_)) =>
17749 {
17750 let mut args = f.args;
17751 let date = args.remove(0);
17752 let interval_expr = args.remove(0);
17753 let (val, unit) = Self::extract_interval_parts(&interval_expr)
17754 .unwrap_or_else(|| {
17755 (
17756 interval_expr.clone(),
17757 crate::expressions::IntervalUnit::Day,
17758 )
17759 });
17760 let unit_str = Self::interval_unit_to_string(&unit);
17761 let is_literal = matches!(&val,
17762 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
17763 );
17764
17765 match target {
17766 DialectType::MySQL | DialectType::SingleStore => {
17767 // Keep as DATE_ADD(date, INTERVAL val UNIT)
17768 Ok(Expression::Function(Box::new(Function::new(
17769 "DATE_ADD".to_string(),
17770 vec![date, interval_expr],
17771 ))))
17772 }
17773 DialectType::PostgreSQL => {
17774 if is_literal {
17775 // Literal: date + INTERVAL 'val UNIT'
17776 let interval = Expression::Interval(Box::new(
17777 crate::expressions::Interval {
17778 this: Some(Expression::Literal(Box::new(
17779 Literal::String(format!(
17780 "{} {}",
17781 Self::expr_to_string(&val),
17782 unit_str
17783 )),
17784 ))),
17785 unit: None,
17786 },
17787 ));
17788 Ok(Expression::Add(Box::new(
17789 crate::expressions::BinaryOp::new(date, interval),
17790 )))
17791 } else {
17792 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
17793 let interval_one = Expression::Interval(Box::new(
17794 crate::expressions::Interval {
17795 this: Some(Expression::Literal(Box::new(
17796 Literal::String(format!("1 {}", unit_str)),
17797 ))),
17798 unit: None,
17799 },
17800 ));
17801 let mul = Expression::Mul(Box::new(
17802 crate::expressions::BinaryOp::new(
17803 interval_one,
17804 val,
17805 ),
17806 ));
17807 Ok(Expression::Add(Box::new(
17808 crate::expressions::BinaryOp::new(date, mul),
17809 )))
17810 }
17811 }
17812 _ => {
17813 // Default: keep as DATE_ADD(date, interval)
17814 Ok(Expression::Function(Box::new(Function::new(
17815 "DATE_ADD".to_string(),
17816 vec![date, interval_expr],
17817 ))))
17818 }
17819 }
17820 }
17821 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
17822 "DATE_SUB"
17823 if f.args.len() == 2
17824 && matches!(
17825 source,
17826 DialectType::Hive
17827 | DialectType::Spark
17828 | DialectType::Databricks
17829 ) =>
17830 {
17831 let mut args = f.args;
17832 let date = args.remove(0);
17833 let days = args.remove(0);
17834 // Helper to create days * -1
17835 let make_neg_days = |d: Expression| -> Expression {
17836 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
17837 d,
17838 Expression::Literal(Box::new(Literal::Number(
17839 "-1".to_string(),
17840 ))),
17841 )))
17842 };
17843 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
17844 match target {
17845 DialectType::Hive
17846 | DialectType::Spark
17847 | DialectType::Databricks => {
17848 // Keep as DATE_SUB(date, days) for Hive/Spark
17849 Ok(Expression::Function(Box::new(Function::new(
17850 "DATE_SUB".to_string(),
17851 vec![date, days],
17852 ))))
17853 }
17854 DialectType::DuckDB => {
17855 let cast_date = Self::ensure_cast_date(date);
17856 let neg = make_neg_days(days);
17857 let interval = Expression::Interval(Box::new(
17858 crate::expressions::Interval {
17859 this: Some(Expression::Paren(Box::new(
17860 crate::expressions::Paren {
17861 this: neg,
17862 trailing_comments: vec![],
17863 },
17864 ))),
17865 unit: Some(
17866 crate::expressions::IntervalUnitSpec::Simple {
17867 unit: crate::expressions::IntervalUnit::Day,
17868 use_plural: false,
17869 },
17870 ),
17871 },
17872 ));
17873 Ok(Expression::Add(Box::new(
17874 crate::expressions::BinaryOp::new(cast_date, interval),
17875 )))
17876 }
17877 DialectType::Snowflake => {
17878 let cast_date = if is_string_literal {
17879 Self::double_cast_timestamp_date(date)
17880 } else {
17881 date
17882 };
17883 let neg = make_neg_days(days);
17884 Ok(Expression::Function(Box::new(Function::new(
17885 "DATEADD".to_string(),
17886 vec![
17887 Expression::Identifier(Identifier::new("DAY")),
17888 neg,
17889 cast_date,
17890 ],
17891 ))))
17892 }
17893 DialectType::Redshift => {
17894 let neg = make_neg_days(days);
17895 Ok(Expression::Function(Box::new(Function::new(
17896 "DATEADD".to_string(),
17897 vec![
17898 Expression::Identifier(Identifier::new("DAY")),
17899 neg,
17900 date,
17901 ],
17902 ))))
17903 }
17904 DialectType::TSQL | DialectType::Fabric => {
17905 let cast_date = if is_string_literal {
17906 Self::double_cast_datetime2_date(date)
17907 } else {
17908 date
17909 };
17910 let neg = make_neg_days(days);
17911 Ok(Expression::Function(Box::new(Function::new(
17912 "DATEADD".to_string(),
17913 vec![
17914 Expression::Identifier(Identifier::new("DAY")),
17915 neg,
17916 cast_date,
17917 ],
17918 ))))
17919 }
17920 DialectType::Presto
17921 | DialectType::Trino
17922 | DialectType::Athena => {
17923 let cast_date = if is_string_literal {
17924 Self::double_cast_timestamp_date(date)
17925 } else {
17926 date
17927 };
17928 let neg = make_neg_days(days);
17929 Ok(Expression::Function(Box::new(Function::new(
17930 "DATE_ADD".to_string(),
17931 vec![Expression::string("DAY"), neg, cast_date],
17932 ))))
17933 }
17934 DialectType::BigQuery => {
17935 let cast_date = if is_string_literal {
17936 Self::double_cast_datetime_date(date)
17937 } else {
17938 date
17939 };
17940 let neg = make_neg_days(days);
17941 let interval = Expression::Interval(Box::new(
17942 crate::expressions::Interval {
17943 this: Some(Expression::Paren(Box::new(
17944 crate::expressions::Paren {
17945 this: neg,
17946 trailing_comments: vec![],
17947 },
17948 ))),
17949 unit: Some(
17950 crate::expressions::IntervalUnitSpec::Simple {
17951 unit: crate::expressions::IntervalUnit::Day,
17952 use_plural: false,
17953 },
17954 ),
17955 },
17956 ));
17957 Ok(Expression::Function(Box::new(Function::new(
17958 "DATE_ADD".to_string(),
17959 vec![cast_date, interval],
17960 ))))
17961 }
17962 _ => Ok(Expression::Function(Box::new(Function::new(
17963 "DATE_SUB".to_string(),
17964 vec![date, days],
17965 )))),
17966 }
17967 }
17968 // ADD_MONTHS(date, val) -> target-specific
17969 "ADD_MONTHS" if f.args.len() == 2 => {
17970 let mut args = f.args;
17971 let date = args.remove(0);
17972 let val = args.remove(0);
17973 match target {
17974 DialectType::TSQL => {
17975 let cast_date = Self::ensure_cast_datetime2(date);
17976 Ok(Expression::Function(Box::new(Function::new(
17977 "DATEADD".to_string(),
17978 vec![
17979 Expression::Identifier(Identifier::new("MONTH")),
17980 val,
17981 cast_date,
17982 ],
17983 ))))
17984 }
17985 DialectType::DuckDB => {
17986 let interval = Expression::Interval(Box::new(
17987 crate::expressions::Interval {
17988 this: Some(val),
17989 unit: Some(
17990 crate::expressions::IntervalUnitSpec::Simple {
17991 unit:
17992 crate::expressions::IntervalUnit::Month,
17993 use_plural: false,
17994 },
17995 ),
17996 },
17997 ));
17998 Ok(Expression::Add(Box::new(
17999 crate::expressions::BinaryOp::new(date, interval),
18000 )))
18001 }
18002 DialectType::Snowflake => {
18003 // Keep ADD_MONTHS when source is Snowflake
18004 if matches!(source, DialectType::Snowflake) {
18005 Ok(Expression::Function(Box::new(Function::new(
18006 "ADD_MONTHS".to_string(),
18007 vec![date, val],
18008 ))))
18009 } else {
18010 Ok(Expression::Function(Box::new(Function::new(
18011 "DATEADD".to_string(),
18012 vec![
18013 Expression::Identifier(Identifier::new(
18014 "MONTH",
18015 )),
18016 val,
18017 date,
18018 ],
18019 ))))
18020 }
18021 }
18022 DialectType::Redshift => {
18023 Ok(Expression::Function(Box::new(Function::new(
18024 "DATEADD".to_string(),
18025 vec![
18026 Expression::Identifier(Identifier::new("MONTH")),
18027 val,
18028 date,
18029 ],
18030 ))))
18031 }
18032 DialectType::Presto
18033 | DialectType::Trino
18034 | DialectType::Athena => {
18035 Ok(Expression::Function(Box::new(Function::new(
18036 "DATE_ADD".to_string(),
18037 vec![Expression::string("MONTH"), val, date],
18038 ))))
18039 }
18040 DialectType::BigQuery => {
18041 let interval = Expression::Interval(Box::new(
18042 crate::expressions::Interval {
18043 this: Some(val),
18044 unit: Some(
18045 crate::expressions::IntervalUnitSpec::Simple {
18046 unit:
18047 crate::expressions::IntervalUnit::Month,
18048 use_plural: false,
18049 },
18050 ),
18051 },
18052 ));
18053 Ok(Expression::Function(Box::new(Function::new(
18054 "DATE_ADD".to_string(),
18055 vec![date, interval],
18056 ))))
18057 }
18058 _ => Ok(Expression::Function(Box::new(Function::new(
18059 "ADD_MONTHS".to_string(),
18060 vec![date, val],
18061 )))),
18062 }
18063 }
18064 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
18065 "DATETRUNC" if f.args.len() == 2 => {
18066 let mut args = f.args;
18067 let arg0 = args.remove(0);
18068 let arg1 = args.remove(0);
18069 let unit_str = Self::get_unit_str_static(&arg0);
18070 match target {
18071 DialectType::TSQL | DialectType::Fabric => {
18072 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
18073 Ok(Expression::Function(Box::new(Function::new(
18074 "DATETRUNC".to_string(),
18075 vec![
18076 Expression::Identifier(Identifier::new(&unit_str)),
18077 arg1,
18078 ],
18079 ))))
18080 }
18081 DialectType::DuckDB => {
18082 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
18083 let date = Self::ensure_cast_timestamp(arg1);
18084 Ok(Expression::Function(Box::new(Function::new(
18085 "DATE_TRUNC".to_string(),
18086 vec![Expression::string(&unit_str), date],
18087 ))))
18088 }
18089 DialectType::ClickHouse => {
18090 // ClickHouse: dateTrunc('UNIT', expr)
18091 Ok(Expression::Function(Box::new(Function::new(
18092 "dateTrunc".to_string(),
18093 vec![Expression::string(&unit_str), arg1],
18094 ))))
18095 }
18096 _ => {
18097 // Standard: DATE_TRUNC('UNIT', expr)
18098 let unit = Expression::string(&unit_str);
18099 Ok(Expression::Function(Box::new(Function::new(
18100 "DATE_TRUNC".to_string(),
18101 vec![unit, arg1],
18102 ))))
18103 }
18104 }
18105 }
18106 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
18107 "GETDATE" if f.args.is_empty() => match target {
18108 DialectType::TSQL => Ok(Expression::Function(f)),
18109 DialectType::Redshift => Ok(Expression::Function(Box::new(
18110 Function::new("GETDATE".to_string(), vec![]),
18111 ))),
18112 _ => Ok(Expression::CurrentTimestamp(
18113 crate::expressions::CurrentTimestamp {
18114 precision: None,
18115 sysdate: false,
18116 },
18117 )),
18118 },
18119 // TO_HEX(x) / HEX(x) -> target-specific hex function
18120 "TO_HEX" | "HEX" if f.args.len() == 1 => {
18121 let name = match target {
18122 DialectType::Presto | DialectType::Trino => "TO_HEX",
18123 DialectType::Spark
18124 | DialectType::Databricks
18125 | DialectType::Hive => "HEX",
18126 DialectType::DuckDB
18127 | DialectType::PostgreSQL
18128 | DialectType::Redshift => "TO_HEX",
18129 _ => &f.name,
18130 };
18131 Ok(Expression::Function(Box::new(Function::new(
18132 name.to_string(),
18133 f.args,
18134 ))))
18135 }
18136 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
18137 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
18138 match target {
18139 DialectType::BigQuery => {
18140 // BigQuery: UNHEX(x) -> FROM_HEX(x)
18141 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
18142 // because BigQuery MD5 returns BYTES, not hex string
18143 let arg = &f.args[0];
18144 let wrapped_arg = match arg {
18145 Expression::Function(inner_f)
18146 if inner_f.name.eq_ignore_ascii_case("MD5")
18147 || inner_f
18148 .name
18149 .eq_ignore_ascii_case("SHA1")
18150 || inner_f
18151 .name
18152 .eq_ignore_ascii_case("SHA256")
18153 || inner_f
18154 .name
18155 .eq_ignore_ascii_case("SHA512") =>
18156 {
18157 // Wrap hash function in TO_HEX for BigQuery
18158 Expression::Function(Box::new(Function::new(
18159 "TO_HEX".to_string(),
18160 vec![arg.clone()],
18161 )))
18162 }
18163 _ => f.args.into_iter().next().unwrap(),
18164 };
18165 Ok(Expression::Function(Box::new(Function::new(
18166 "FROM_HEX".to_string(),
18167 vec![wrapped_arg],
18168 ))))
18169 }
18170 _ => {
18171 let name = match target {
18172 DialectType::Presto | DialectType::Trino => "FROM_HEX",
18173 DialectType::Spark
18174 | DialectType::Databricks
18175 | DialectType::Hive => "UNHEX",
18176 _ => &f.name,
18177 };
18178 Ok(Expression::Function(Box::new(Function::new(
18179 name.to_string(),
18180 f.args,
18181 ))))
18182 }
18183 }
18184 }
18185 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
18186 "TO_UTF8" if f.args.len() == 1 => match target {
18187 DialectType::Spark | DialectType::Databricks => {
18188 let mut args = f.args;
18189 args.push(Expression::string("utf-8"));
18190 Ok(Expression::Function(Box::new(Function::new(
18191 "ENCODE".to_string(),
18192 args,
18193 ))))
18194 }
18195 _ => Ok(Expression::Function(f)),
18196 },
18197 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
18198 "FROM_UTF8" if f.args.len() == 1 => match target {
18199 DialectType::Spark | DialectType::Databricks => {
18200 let mut args = f.args;
18201 args.push(Expression::string("utf-8"));
18202 Ok(Expression::Function(Box::new(Function::new(
18203 "DECODE".to_string(),
18204 args,
18205 ))))
18206 }
18207 _ => Ok(Expression::Function(f)),
18208 },
18209 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
18210 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
18211 let name = match target {
18212 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
18213 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
18214 DialectType::PostgreSQL | DialectType::Redshift => {
18215 "STARTS_WITH"
18216 }
18217 _ => &f.name,
18218 };
18219 Ok(Expression::Function(Box::new(Function::new(
18220 name.to_string(),
18221 f.args,
18222 ))))
18223 }
18224 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
18225 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
18226 let name = match target {
18227 DialectType::Presto
18228 | DialectType::Trino
18229 | DialectType::Athena => "APPROX_DISTINCT",
18230 _ => "APPROX_COUNT_DISTINCT",
18231 };
18232 Ok(Expression::Function(Box::new(Function::new(
18233 name.to_string(),
18234 f.args,
18235 ))))
18236 }
18237 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
18238 "JSON_EXTRACT"
18239 if f.args.len() == 2
18240 && !matches!(source, DialectType::BigQuery)
18241 && matches!(
18242 target,
18243 DialectType::Spark
18244 | DialectType::Databricks
18245 | DialectType::Hive
18246 ) =>
18247 {
18248 Ok(Expression::Function(Box::new(Function::new(
18249 "GET_JSON_OBJECT".to_string(),
18250 f.args,
18251 ))))
18252 }
18253 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
18254 "JSON_EXTRACT"
18255 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
18256 {
18257 let mut args = f.args;
18258 let path = args.remove(1);
18259 let this = args.remove(0);
18260 Ok(Expression::JsonExtract(Box::new(
18261 crate::expressions::JsonExtractFunc {
18262 this,
18263 path,
18264 returning: None,
18265 arrow_syntax: true,
18266 hash_arrow_syntax: false,
18267 wrapper_option: None,
18268 quotes_option: None,
18269 on_scalar_string: false,
18270 on_error: None,
18271 },
18272 )))
18273 }
18274 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
18275 "JSON_FORMAT" if f.args.len() == 1 => {
18276 match target {
18277 DialectType::Spark | DialectType::Databricks => {
18278 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
18279 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
18280 if matches!(
18281 source,
18282 DialectType::Presto
18283 | DialectType::Trino
18284 | DialectType::Athena
18285 ) {
18286 if let Some(Expression::ParseJson(pj)) = f.args.first()
18287 {
18288 if let Expression::Literal(lit) = &pj.this {
18289 if let Literal::String(s) = lit.as_ref() {
18290 let wrapped =
18291 Expression::Literal(Box::new(
18292 Literal::String(format!("[{}]", s)),
18293 ));
18294 let schema_of_json = Expression::Function(
18295 Box::new(Function::new(
18296 "SCHEMA_OF_JSON".to_string(),
18297 vec![wrapped.clone()],
18298 )),
18299 );
18300 let from_json = Expression::Function(
18301 Box::new(Function::new(
18302 "FROM_JSON".to_string(),
18303 vec![wrapped, schema_of_json],
18304 )),
18305 );
18306 let to_json = Expression::Function(
18307 Box::new(Function::new(
18308 "TO_JSON".to_string(),
18309 vec![from_json],
18310 )),
18311 );
18312 return Ok(Expression::Function(Box::new(
18313 Function::new(
18314 "REGEXP_EXTRACT".to_string(),
18315 vec![
18316 to_json,
18317 Expression::Literal(Box::new(
18318 Literal::String(
18319 "^.(.*).$".to_string(),
18320 ),
18321 )),
18322 Expression::Literal(Box::new(
18323 Literal::Number(
18324 "1".to_string(),
18325 ),
18326 )),
18327 ],
18328 ),
18329 )));
18330 }
18331 }
18332 }
18333 }
18334
18335 // Strip inner CAST(... AS JSON) or TO_JSON() if present
18336 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
18337 let mut args = f.args;
18338 if let Some(Expression::Cast(ref c)) = args.first() {
18339 if matches!(&c.to, DataType::Json | DataType::JsonB) {
18340 args = vec![c.this.clone()];
18341 }
18342 } else if let Some(Expression::Function(ref inner_f)) =
18343 args.first()
18344 {
18345 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
18346 && inner_f.args.len() == 1
18347 {
18348 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
18349 args = inner_f.args.clone();
18350 }
18351 }
18352 Ok(Expression::Function(Box::new(Function::new(
18353 "TO_JSON".to_string(),
18354 args,
18355 ))))
18356 }
18357 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18358 Function::new("TO_JSON_STRING".to_string(), f.args),
18359 ))),
18360 DialectType::DuckDB => {
18361 // CAST(TO_JSON(x) AS TEXT)
18362 let to_json = Expression::Function(Box::new(
18363 Function::new("TO_JSON".to_string(), f.args),
18364 ));
18365 Ok(Expression::Cast(Box::new(Cast {
18366 this: to_json,
18367 to: DataType::Text,
18368 trailing_comments: Vec::new(),
18369 double_colon_syntax: false,
18370 format: None,
18371 default: None,
18372 inferred_type: None,
18373 })))
18374 }
18375 _ => Ok(Expression::Function(f)),
18376 }
18377 }
18378 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
18379 "SYSDATE" if f.args.is_empty() => {
18380 match target {
18381 DialectType::Oracle | DialectType::Redshift => {
18382 Ok(Expression::Function(f))
18383 }
18384 DialectType::Snowflake => {
18385 // Snowflake uses SYSDATE() with parens
18386 let mut f = *f;
18387 f.no_parens = false;
18388 Ok(Expression::Function(Box::new(f)))
18389 }
18390 DialectType::DuckDB => {
18391 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
18392 Ok(Expression::AtTimeZone(Box::new(
18393 crate::expressions::AtTimeZone {
18394 this: Expression::CurrentTimestamp(
18395 crate::expressions::CurrentTimestamp {
18396 precision: None,
18397 sysdate: false,
18398 },
18399 ),
18400 zone: Expression::Literal(Box::new(
18401 Literal::String("UTC".to_string()),
18402 )),
18403 },
18404 )))
18405 }
18406 _ => Ok(Expression::CurrentTimestamp(
18407 crate::expressions::CurrentTimestamp {
18408 precision: None,
18409 sysdate: true,
18410 },
18411 )),
18412 }
18413 }
18414 // LOGICAL_OR(x) -> BOOL_OR(x)
18415 "LOGICAL_OR" if f.args.len() == 1 => {
18416 let name = match target {
18417 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
18418 _ => &f.name,
18419 };
18420 Ok(Expression::Function(Box::new(Function::new(
18421 name.to_string(),
18422 f.args,
18423 ))))
18424 }
18425 // LOGICAL_AND(x) -> BOOL_AND(x)
18426 "LOGICAL_AND" if f.args.len() == 1 => {
18427 let name = match target {
18428 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
18429 _ => &f.name,
18430 };
18431 Ok(Expression::Function(Box::new(Function::new(
18432 name.to_string(),
18433 f.args,
18434 ))))
18435 }
18436 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
18437 "MONTHS_ADD" if f.args.len() == 2 => match target {
18438 DialectType::Oracle => Ok(Expression::Function(Box::new(
18439 Function::new("ADD_MONTHS".to_string(), f.args),
18440 ))),
18441 _ => Ok(Expression::Function(f)),
18442 },
18443 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
18444 "ARRAY_JOIN" if f.args.len() >= 2 => {
18445 match target {
18446 DialectType::Spark | DialectType::Databricks => {
18447 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
18448 Ok(Expression::Function(f))
18449 }
18450 DialectType::Hive => {
18451 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
18452 let mut args = f.args;
18453 let arr = args.remove(0);
18454 let sep = args.remove(0);
18455 // Drop any remaining args (null_replacement)
18456 Ok(Expression::Function(Box::new(Function::new(
18457 "CONCAT_WS".to_string(),
18458 vec![sep, arr],
18459 ))))
18460 }
18461 DialectType::Presto | DialectType::Trino => {
18462 Ok(Expression::Function(f))
18463 }
18464 _ => Ok(Expression::Function(f)),
18465 }
18466 }
18467 // LOCATE(substr, str, pos) 3-arg -> target-specific
18468 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
18469 "LOCATE"
18470 if f.args.len() == 3
18471 && matches!(
18472 target,
18473 DialectType::Presto
18474 | DialectType::Trino
18475 | DialectType::Athena
18476 | DialectType::DuckDB
18477 ) =>
18478 {
18479 let mut args = f.args;
18480 let substr = args.remove(0);
18481 let string = args.remove(0);
18482 let pos = args.remove(0);
18483 // STRPOS(SUBSTRING(string, pos), substr)
18484 let substring_call = Expression::Function(Box::new(Function::new(
18485 "SUBSTRING".to_string(),
18486 vec![string.clone(), pos.clone()],
18487 )));
18488 let strpos_call = Expression::Function(Box::new(Function::new(
18489 "STRPOS".to_string(),
18490 vec![substring_call, substr.clone()],
18491 )));
18492 // STRPOS(...) + pos - 1
18493 let pos_adjusted =
18494 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
18495 Expression::Add(Box::new(
18496 crate::expressions::BinaryOp::new(
18497 strpos_call.clone(),
18498 pos.clone(),
18499 ),
18500 )),
18501 Expression::number(1),
18502 )));
18503 // STRPOS(...) = 0
18504 let is_zero =
18505 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
18506 strpos_call.clone(),
18507 Expression::number(0),
18508 )));
18509
18510 match target {
18511 DialectType::Presto
18512 | DialectType::Trino
18513 | DialectType::Athena => {
18514 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
18515 Ok(Expression::Function(Box::new(Function::new(
18516 "IF".to_string(),
18517 vec![is_zero, Expression::number(0), pos_adjusted],
18518 ))))
18519 }
18520 DialectType::DuckDB => {
18521 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
18522 Ok(Expression::Case(Box::new(crate::expressions::Case {
18523 operand: None,
18524 whens: vec![(is_zero, Expression::number(0))],
18525 else_: Some(pos_adjusted),
18526 comments: Vec::new(),
18527 inferred_type: None,
18528 })))
18529 }
18530 _ => Ok(Expression::Function(Box::new(Function::new(
18531 "LOCATE".to_string(),
18532 vec![substr, string, pos],
18533 )))),
18534 }
18535 }
18536 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
18537 "STRPOS"
18538 if f.args.len() == 3
18539 && matches!(
18540 target,
18541 DialectType::BigQuery
18542 | DialectType::Oracle
18543 | DialectType::Teradata
18544 ) =>
18545 {
18546 let mut args = f.args;
18547 let haystack = args.remove(0);
18548 let needle = args.remove(0);
18549 let occurrence = args.remove(0);
18550 Ok(Expression::Function(Box::new(Function::new(
18551 "INSTR".to_string(),
18552 vec![haystack, needle, Expression::number(1), occurrence],
18553 ))))
18554 }
18555 // SCHEMA_NAME(id) -> target-specific
18556 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
18557 DialectType::MySQL | DialectType::SingleStore => {
18558 Ok(Expression::Function(Box::new(Function::new(
18559 "SCHEMA".to_string(),
18560 vec![],
18561 ))))
18562 }
18563 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
18564 crate::expressions::CurrentSchema { this: None },
18565 ))),
18566 DialectType::SQLite => Ok(Expression::string("main")),
18567 _ => Ok(Expression::Function(f)),
18568 },
18569 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
18570 "STRTOL" if f.args.len() == 2 => match target {
18571 DialectType::Presto | DialectType::Trino => {
18572 Ok(Expression::Function(Box::new(Function::new(
18573 "FROM_BASE".to_string(),
18574 f.args,
18575 ))))
18576 }
18577 _ => Ok(Expression::Function(f)),
18578 },
18579 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
18580 "EDITDIST3" if f.args.len() == 2 => match target {
18581 DialectType::Spark | DialectType::Databricks => {
18582 Ok(Expression::Function(Box::new(Function::new(
18583 "LEVENSHTEIN".to_string(),
18584 f.args,
18585 ))))
18586 }
18587 _ => Ok(Expression::Function(f)),
18588 },
18589 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
18590 "FORMAT"
18591 if f.args.len() == 2
18592 && matches!(
18593 source,
18594 DialectType::MySQL | DialectType::SingleStore
18595 )
18596 && matches!(target, DialectType::DuckDB) =>
18597 {
18598 let mut args = f.args;
18599 let num_expr = args.remove(0);
18600 let decimals_expr = args.remove(0);
18601 // Extract decimal count
18602 let dec_count = match &decimals_expr {
18603 Expression::Literal(lit)
18604 if matches!(lit.as_ref(), Literal::Number(_)) =>
18605 {
18606 let Literal::Number(n) = lit.as_ref() else {
18607 unreachable!()
18608 };
18609 n.clone()
18610 }
18611 _ => "0".to_string(),
18612 };
18613 let fmt_str = format!("{{:,.{}f}}", dec_count);
18614 Ok(Expression::Function(Box::new(Function::new(
18615 "FORMAT".to_string(),
18616 vec![Expression::string(&fmt_str), num_expr],
18617 ))))
18618 }
18619 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
18620 "FORMAT"
18621 if f.args.len() == 2
18622 && matches!(
18623 source,
18624 DialectType::TSQL | DialectType::Fabric
18625 ) =>
18626 {
18627 let val_expr = f.args[0].clone();
18628 let fmt_expr = f.args[1].clone();
18629 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
18630 // Only expand shortcodes that are NOT also valid numeric format specifiers.
18631 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
18632 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
18633 let (expanded_fmt, is_shortcode) = match &fmt_expr {
18634 Expression::Literal(lit)
18635 if matches!(
18636 lit.as_ref(),
18637 crate::expressions::Literal::String(_)
18638 ) =>
18639 {
18640 let crate::expressions::Literal::String(s) = lit.as_ref()
18641 else {
18642 unreachable!()
18643 };
18644 match s.as_str() {
18645 "m" | "M" => (Expression::string("MMMM d"), true),
18646 "t" => (Expression::string("h:mm tt"), true),
18647 "T" => (Expression::string("h:mm:ss tt"), true),
18648 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
18649 _ => (fmt_expr.clone(), false),
18650 }
18651 }
18652 _ => (fmt_expr.clone(), false),
18653 };
18654 // Check if the format looks like a date format
18655 let is_date_format = is_shortcode
18656 || match &expanded_fmt {
18657 Expression::Literal(lit)
18658 if matches!(
18659 lit.as_ref(),
18660 crate::expressions::Literal::String(_)
18661 ) =>
18662 {
18663 let crate::expressions::Literal::String(s) =
18664 lit.as_ref()
18665 else {
18666 unreachable!()
18667 };
18668 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
18669 s.contains("yyyy")
18670 || s.contains("YYYY")
18671 || s.contains("MM")
18672 || s.contains("dd")
18673 || s.contains("MMMM")
18674 || s.contains("HH")
18675 || s.contains("hh")
18676 || s.contains("ss")
18677 }
18678 _ => false,
18679 };
18680 match target {
18681 DialectType::Spark | DialectType::Databricks => {
18682 let func_name = if is_date_format {
18683 "DATE_FORMAT"
18684 } else {
18685 "FORMAT_NUMBER"
18686 };
18687 Ok(Expression::Function(Box::new(Function::new(
18688 func_name.to_string(),
18689 vec![val_expr, expanded_fmt],
18690 ))))
18691 }
18692 _ => {
18693 // For TSQL and other targets, expand shortcodes but keep FORMAT
18694 if is_shortcode {
18695 Ok(Expression::Function(Box::new(Function::new(
18696 "FORMAT".to_string(),
18697 vec![val_expr, expanded_fmt],
18698 ))))
18699 } else {
18700 Ok(Expression::Function(f))
18701 }
18702 }
18703 }
18704 }
18705 // FORMAT('%s', x) from Trino/Presto -> target-specific
18706 "FORMAT"
18707 if f.args.len() >= 2
18708 && matches!(
18709 source,
18710 DialectType::Trino
18711 | DialectType::Presto
18712 | DialectType::Athena
18713 ) =>
18714 {
18715 let fmt_expr = f.args[0].clone();
18716 let value_args: Vec<Expression> = f.args[1..].to_vec();
18717 match target {
18718 // DuckDB: replace %s with {} in format string
18719 DialectType::DuckDB => {
18720 let new_fmt = match &fmt_expr {
18721 Expression::Literal(lit)
18722 if matches!(lit.as_ref(), Literal::String(_)) =>
18723 {
18724 let Literal::String(s) = lit.as_ref() else {
18725 unreachable!()
18726 };
18727 Expression::Literal(Box::new(Literal::String(
18728 s.replace("%s", "{}"),
18729 )))
18730 }
18731 _ => fmt_expr,
18732 };
18733 let mut args = vec![new_fmt];
18734 args.extend(value_args);
18735 Ok(Expression::Function(Box::new(Function::new(
18736 "FORMAT".to_string(),
18737 args,
18738 ))))
18739 }
18740 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
18741 DialectType::Snowflake => match &fmt_expr {
18742 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
18743 {
18744 let Literal::String(_) = lit.as_ref() else {
18745 unreachable!()
18746 };
18747 Ok(Expression::Function(Box::new(Function::new(
18748 "TO_CHAR".to_string(),
18749 value_args,
18750 ))))
18751 }
18752 _ => Ok(Expression::Function(f)),
18753 },
18754 // Default: keep FORMAT as-is
18755 _ => Ok(Expression::Function(f)),
18756 }
18757 }
18758 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
18759 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
18760 if f.args.len() == 2 =>
18761 {
18762 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
18763 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
18764 if matches!(target, DialectType::DuckDB)
18765 && matches!(source, DialectType::Snowflake)
18766 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
18767 {
18768 let value = f.args[0].clone();
18769 let array = f.args[1].clone();
18770
18771 // value IS NULL
18772 let value_is_null =
18773 Expression::IsNull(Box::new(crate::expressions::IsNull {
18774 this: value.clone(),
18775 not: false,
18776 postfix_form: false,
18777 }));
18778
18779 // ARRAY_LENGTH(array)
18780 let array_length =
18781 Expression::Function(Box::new(Function::new(
18782 "ARRAY_LENGTH".to_string(),
18783 vec![array.clone()],
18784 )));
18785 // LIST_COUNT(array)
18786 let list_count = Expression::Function(Box::new(Function::new(
18787 "LIST_COUNT".to_string(),
18788 vec![array.clone()],
18789 )));
18790 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
18791 let neq =
18792 Expression::Neq(Box::new(crate::expressions::BinaryOp {
18793 left: array_length,
18794 right: list_count,
18795 left_comments: vec![],
18796 operator_comments: vec![],
18797 trailing_comments: vec![],
18798 inferred_type: None,
18799 }));
18800 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
18801 let nullif =
18802 Expression::Nullif(Box::new(crate::expressions::Nullif {
18803 this: Box::new(neq),
18804 expression: Box::new(Expression::Boolean(
18805 crate::expressions::BooleanLiteral { value: false },
18806 )),
18807 }));
18808
18809 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
18810 let array_contains =
18811 Expression::Function(Box::new(Function::new(
18812 "ARRAY_CONTAINS".to_string(),
18813 vec![array, value],
18814 )));
18815
18816 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
18817 return Ok(Expression::Case(Box::new(Case {
18818 operand: None,
18819 whens: vec![(value_is_null, nullif)],
18820 else_: Some(array_contains),
18821 comments: Vec::new(),
18822 inferred_type: None,
18823 })));
18824 }
18825 match target {
18826 DialectType::PostgreSQL | DialectType::Redshift => {
18827 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
18828 let arr = f.args[0].clone();
18829 let needle = f.args[1].clone();
18830 // Convert [] to ARRAY[] for PostgreSQL
18831 let pg_arr = match arr {
18832 Expression::Array(a) => Expression::ArrayFunc(
18833 Box::new(crate::expressions::ArrayConstructor {
18834 expressions: a.expressions,
18835 bracket_notation: false,
18836 use_list_keyword: false,
18837 }),
18838 ),
18839 _ => arr,
18840 };
18841 // needle = ANY(arr) using the Any quantified expression
18842 let any_expr = Expression::Any(Box::new(
18843 crate::expressions::QuantifiedExpr {
18844 this: needle.clone(),
18845 subquery: pg_arr,
18846 op: Some(crate::expressions::QuantifiedOp::Eq),
18847 },
18848 ));
18849 let coalesce = Expression::Coalesce(Box::new(
18850 crate::expressions::VarArgFunc {
18851 expressions: vec![
18852 any_expr,
18853 Expression::Boolean(
18854 crate::expressions::BooleanLiteral {
18855 value: false,
18856 },
18857 ),
18858 ],
18859 original_name: None,
18860 inferred_type: None,
18861 },
18862 ));
18863 let is_null_check = Expression::IsNull(Box::new(
18864 crate::expressions::IsNull {
18865 this: needle,
18866 not: false,
18867 postfix_form: false,
18868 },
18869 ));
18870 Ok(Expression::Case(Box::new(Case {
18871 operand: None,
18872 whens: vec![(
18873 is_null_check,
18874 Expression::Null(crate::expressions::Null),
18875 )],
18876 else_: Some(coalesce),
18877 comments: Vec::new(),
18878 inferred_type: None,
18879 })))
18880 }
18881 _ => Ok(Expression::Function(Box::new(Function::new(
18882 "ARRAY_CONTAINS".to_string(),
18883 f.args,
18884 )))),
18885 }
18886 }
18887 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
18888 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
18889 match target {
18890 DialectType::PostgreSQL | DialectType::Redshift => {
18891 // arr1 && arr2 with ARRAY[] syntax
18892 let mut args = f.args;
18893 let arr1 = args.remove(0);
18894 let arr2 = args.remove(0);
18895 let pg_arr1 = match arr1 {
18896 Expression::Array(a) => Expression::ArrayFunc(
18897 Box::new(crate::expressions::ArrayConstructor {
18898 expressions: a.expressions,
18899 bracket_notation: false,
18900 use_list_keyword: false,
18901 }),
18902 ),
18903 _ => arr1,
18904 };
18905 let pg_arr2 = match arr2 {
18906 Expression::Array(a) => Expression::ArrayFunc(
18907 Box::new(crate::expressions::ArrayConstructor {
18908 expressions: a.expressions,
18909 bracket_notation: false,
18910 use_list_keyword: false,
18911 }),
18912 ),
18913 _ => arr2,
18914 };
18915 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18916 pg_arr1, pg_arr2,
18917 ))))
18918 }
18919 DialectType::DuckDB => {
18920 // DuckDB: arr1 && arr2 (native support)
18921 let mut args = f.args;
18922 let arr1 = args.remove(0);
18923 let arr2 = args.remove(0);
18924 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18925 arr1, arr2,
18926 ))))
18927 }
18928 _ => Ok(Expression::Function(Box::new(Function::new(
18929 "LIST_HAS_ANY".to_string(),
18930 f.args,
18931 )))),
18932 }
18933 }
18934 // APPROX_QUANTILE(x, q) -> target-specific
18935 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
18936 DialectType::Snowflake => Ok(Expression::Function(Box::new(
18937 Function::new("APPROX_PERCENTILE".to_string(), f.args),
18938 ))),
18939 DialectType::DuckDB => Ok(Expression::Function(f)),
18940 _ => Ok(Expression::Function(f)),
18941 },
18942 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
18943 "MAKE_DATE" if f.args.len() == 3 => match target {
18944 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18945 Function::new("DATE".to_string(), f.args),
18946 ))),
18947 _ => Ok(Expression::Function(f)),
18948 },
18949 // RANGE(start, end[, step]) -> target-specific
18950 "RANGE"
18951 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
18952 {
18953 let start = f.args[0].clone();
18954 let end = f.args[1].clone();
18955 let step = f.args.get(2).cloned();
18956 match target {
18957 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
18958 // so just rename without adjusting the end argument.
18959 DialectType::Snowflake => {
18960 let mut args = vec![start, end];
18961 if let Some(s) = step {
18962 args.push(s);
18963 }
18964 Ok(Expression::Function(Box::new(Function::new(
18965 "ARRAY_GENERATE_RANGE".to_string(),
18966 args,
18967 ))))
18968 }
18969 DialectType::Spark | DialectType::Databricks => {
18970 // RANGE(start, end) -> SEQUENCE(start, end-1)
18971 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
18972 // RANGE(start, start) -> ARRAY() (empty)
18973 // RANGE(start, end, 0) -> ARRAY() (empty)
18974 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
18975
18976 // Check for constant args
18977 fn extract_i64(e: &Expression) -> Option<i64> {
18978 match e {
18979 Expression::Literal(lit)
18980 if matches!(
18981 lit.as_ref(),
18982 Literal::Number(_)
18983 ) =>
18984 {
18985 let Literal::Number(n) = lit.as_ref() else {
18986 unreachable!()
18987 };
18988 n.parse::<i64>().ok()
18989 }
18990 Expression::Neg(u) => {
18991 if let Expression::Literal(lit) = &u.this {
18992 if let Literal::Number(n) = lit.as_ref() {
18993 n.parse::<i64>().ok().map(|v| -v)
18994 } else {
18995 None
18996 }
18997 } else {
18998 None
18999 }
19000 }
19001 _ => None,
19002 }
19003 }
19004 let start_val = extract_i64(&start);
19005 let end_val = extract_i64(&end);
19006 let step_val = step.as_ref().and_then(|s| extract_i64(s));
19007
19008 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
19009 if step_val == Some(0) {
19010 return Ok(Expression::Function(Box::new(
19011 Function::new("ARRAY".to_string(), vec![]),
19012 )));
19013 }
19014 if let (Some(s), Some(e_val)) = (start_val, end_val) {
19015 if s == e_val {
19016 return Ok(Expression::Function(Box::new(
19017 Function::new("ARRAY".to_string(), vec![]),
19018 )));
19019 }
19020 }
19021
19022 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
19023 // All constants - compute new end = end - step (if step provided) or end - 1
19024 match step_val {
19025 Some(st) if st < 0 => {
19026 // Negative step: SEQUENCE(start, end - step, step)
19027 let new_end = e_val - st; // end - step (= end + |step|)
19028 let mut args =
19029 vec![start, Expression::number(new_end)];
19030 if let Some(s) = step {
19031 args.push(s);
19032 }
19033 Ok(Expression::Function(Box::new(
19034 Function::new("SEQUENCE".to_string(), args),
19035 )))
19036 }
19037 Some(st) => {
19038 let new_end = e_val - st;
19039 let mut args =
19040 vec![start, Expression::number(new_end)];
19041 if let Some(s) = step {
19042 args.push(s);
19043 }
19044 Ok(Expression::Function(Box::new(
19045 Function::new("SEQUENCE".to_string(), args),
19046 )))
19047 }
19048 None => {
19049 // No step: SEQUENCE(start, end - 1)
19050 let new_end = e_val - 1;
19051 Ok(Expression::Function(Box::new(
19052 Function::new(
19053 "SEQUENCE".to_string(),
19054 vec![
19055 start,
19056 Expression::number(new_end),
19057 ],
19058 ),
19059 )))
19060 }
19061 }
19062 } else {
19063 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
19064 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
19065 end.clone(),
19066 Expression::number(1),
19067 )));
19068 let cond = Expression::Lt(Box::new(BinaryOp::new(
19069 Expression::Paren(Box::new(Paren {
19070 this: end_m1.clone(),
19071 trailing_comments: Vec::new(),
19072 })),
19073 start.clone(),
19074 )));
19075 let empty = Expression::Function(Box::new(
19076 Function::new("ARRAY".to_string(), vec![]),
19077 ));
19078 let mut seq_args = vec![
19079 start,
19080 Expression::Paren(Box::new(Paren {
19081 this: end_m1,
19082 trailing_comments: Vec::new(),
19083 })),
19084 ];
19085 if let Some(s) = step {
19086 seq_args.push(s);
19087 }
19088 let seq = Expression::Function(Box::new(
19089 Function::new("SEQUENCE".to_string(), seq_args),
19090 ));
19091 Ok(Expression::IfFunc(Box::new(
19092 crate::expressions::IfFunc {
19093 condition: cond,
19094 true_value: empty,
19095 false_value: Some(seq),
19096 original_name: None,
19097 inferred_type: None,
19098 },
19099 )))
19100 }
19101 }
19102 DialectType::SQLite => {
19103 // RANGE(start, end) -> GENERATE_SERIES(start, end)
19104 // The subquery wrapping is handled at the Alias level
19105 let mut args = vec![start, end];
19106 if let Some(s) = step {
19107 args.push(s);
19108 }
19109 Ok(Expression::Function(Box::new(Function::new(
19110 "GENERATE_SERIES".to_string(),
19111 args,
19112 ))))
19113 }
19114 _ => Ok(Expression::Function(f)),
19115 }
19116 }
19117 // ARRAY_REVERSE_SORT -> target-specific
19118 // (handled above as well, but also need DuckDB self-normalization)
19119 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
19120 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
19121 DialectType::Snowflake => Ok(Expression::Function(Box::new(
19122 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
19123 ))),
19124 DialectType::Spark | DialectType::Databricks => {
19125 Ok(Expression::Function(Box::new(Function::new(
19126 "MAP_FROM_ARRAYS".to_string(),
19127 f.args,
19128 ))))
19129 }
19130 _ => Ok(Expression::Function(Box::new(Function::new(
19131 "MAP".to_string(),
19132 f.args,
19133 )))),
19134 },
19135 // VARIANCE(x) -> varSamp(x) for ClickHouse
19136 "VARIANCE" if f.args.len() == 1 => match target {
19137 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
19138 Function::new("varSamp".to_string(), f.args),
19139 ))),
19140 _ => Ok(Expression::Function(f)),
19141 },
19142 // STDDEV(x) -> stddevSamp(x) for ClickHouse
19143 "STDDEV" if f.args.len() == 1 => match target {
19144 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
19145 Function::new("stddevSamp".to_string(), f.args),
19146 ))),
19147 _ => Ok(Expression::Function(f)),
19148 },
19149 // ISINF(x) -> IS_INF(x) for BigQuery
19150 "ISINF" if f.args.len() == 1 => match target {
19151 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19152 Function::new("IS_INF".to_string(), f.args),
19153 ))),
19154 _ => Ok(Expression::Function(f)),
19155 },
19156 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
19157 "CONTAINS" if f.args.len() == 2 => match target {
19158 DialectType::Spark
19159 | DialectType::Databricks
19160 | DialectType::Hive => Ok(Expression::Function(Box::new(
19161 Function::new("ARRAY_CONTAINS".to_string(), f.args),
19162 ))),
19163 _ => Ok(Expression::Function(f)),
19164 },
19165 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
19166 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
19167 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19168 Ok(Expression::Function(Box::new(Function::new(
19169 "CONTAINS".to_string(),
19170 f.args,
19171 ))))
19172 }
19173 DialectType::DuckDB => Ok(Expression::Function(Box::new(
19174 Function::new("ARRAY_CONTAINS".to_string(), f.args),
19175 ))),
19176 _ => Ok(Expression::Function(f)),
19177 },
19178 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
19179 "TO_UNIXTIME" if f.args.len() == 1 => match target {
19180 DialectType::Hive
19181 | DialectType::Spark
19182 | DialectType::Databricks => Ok(Expression::Function(Box::new(
19183 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
19184 ))),
19185 _ => Ok(Expression::Function(f)),
19186 },
19187 // FROM_UNIXTIME(x) -> target-specific
19188 "FROM_UNIXTIME" if f.args.len() == 1 => {
19189 match target {
19190 DialectType::Hive
19191 | DialectType::Spark
19192 | DialectType::Databricks
19193 | DialectType::Presto
19194 | DialectType::Trino => Ok(Expression::Function(f)),
19195 DialectType::DuckDB => {
19196 // DuckDB: TO_TIMESTAMP(x)
19197 let arg = f.args.into_iter().next().unwrap();
19198 Ok(Expression::Function(Box::new(Function::new(
19199 "TO_TIMESTAMP".to_string(),
19200 vec![arg],
19201 ))))
19202 }
19203 DialectType::PostgreSQL => {
19204 // PG: TO_TIMESTAMP(col)
19205 let arg = f.args.into_iter().next().unwrap();
19206 Ok(Expression::Function(Box::new(Function::new(
19207 "TO_TIMESTAMP".to_string(),
19208 vec![arg],
19209 ))))
19210 }
19211 DialectType::Redshift => {
19212 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
19213 let arg = f.args.into_iter().next().unwrap();
19214 let epoch_ts = Expression::Literal(Box::new(
19215 Literal::Timestamp("epoch".to_string()),
19216 ));
19217 let interval = Expression::Interval(Box::new(
19218 crate::expressions::Interval {
19219 this: Some(Expression::string("1 SECOND")),
19220 unit: None,
19221 },
19222 ));
19223 let mul =
19224 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
19225 let add =
19226 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
19227 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
19228 this: add,
19229 trailing_comments: Vec::new(),
19230 })))
19231 }
19232 _ => Ok(Expression::Function(f)),
19233 }
19234 }
19235 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
19236 "FROM_UNIXTIME"
19237 if f.args.len() == 2
19238 && matches!(
19239 source,
19240 DialectType::Hive
19241 | DialectType::Spark
19242 | DialectType::Databricks
19243 ) =>
19244 {
19245 let mut args = f.args;
19246 let unix_ts = args.remove(0);
19247 let fmt_expr = args.remove(0);
19248 match target {
19249 DialectType::DuckDB => {
19250 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
19251 let to_ts = Expression::Function(Box::new(Function::new(
19252 "TO_TIMESTAMP".to_string(),
19253 vec![unix_ts],
19254 )));
19255 if let Expression::Literal(lit) = &fmt_expr {
19256 if let crate::expressions::Literal::String(s) =
19257 lit.as_ref()
19258 {
19259 let c_fmt = Self::hive_format_to_c_format(s);
19260 Ok(Expression::Function(Box::new(Function::new(
19261 "STRFTIME".to_string(),
19262 vec![to_ts, Expression::string(&c_fmt)],
19263 ))))
19264 } else {
19265 Ok(Expression::Function(Box::new(Function::new(
19266 "STRFTIME".to_string(),
19267 vec![to_ts, fmt_expr],
19268 ))))
19269 }
19270 } else {
19271 Ok(Expression::Function(Box::new(Function::new(
19272 "STRFTIME".to_string(),
19273 vec![to_ts, fmt_expr],
19274 ))))
19275 }
19276 }
19277 DialectType::Presto
19278 | DialectType::Trino
19279 | DialectType::Athena => {
19280 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
19281 let from_unix =
19282 Expression::Function(Box::new(Function::new(
19283 "FROM_UNIXTIME".to_string(),
19284 vec![unix_ts],
19285 )));
19286 if let Expression::Literal(lit) = &fmt_expr {
19287 if let crate::expressions::Literal::String(s) =
19288 lit.as_ref()
19289 {
19290 let p_fmt = Self::hive_format_to_presto_format(s);
19291 Ok(Expression::Function(Box::new(Function::new(
19292 "DATE_FORMAT".to_string(),
19293 vec![from_unix, Expression::string(&p_fmt)],
19294 ))))
19295 } else {
19296 Ok(Expression::Function(Box::new(Function::new(
19297 "DATE_FORMAT".to_string(),
19298 vec![from_unix, fmt_expr],
19299 ))))
19300 }
19301 } else {
19302 Ok(Expression::Function(Box::new(Function::new(
19303 "DATE_FORMAT".to_string(),
19304 vec![from_unix, fmt_expr],
19305 ))))
19306 }
19307 }
19308 _ => {
19309 // Keep as FROM_UNIXTIME(x, fmt) for other targets
19310 Ok(Expression::Function(Box::new(Function::new(
19311 "FROM_UNIXTIME".to_string(),
19312 vec![unix_ts, fmt_expr],
19313 ))))
19314 }
19315 }
19316 }
19317 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
19318 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
19319 let unit_str = Self::get_unit_str_static(&f.args[0]);
19320 // Get the raw unit text preserving original case
19321 let raw_unit = match &f.args[0] {
19322 Expression::Identifier(id) => id.name.clone(),
19323 Expression::Var(v) => v.this.clone(),
19324 Expression::Literal(lit)
19325 if matches!(
19326 lit.as_ref(),
19327 crate::expressions::Literal::String(_)
19328 ) =>
19329 {
19330 let crate::expressions::Literal::String(s) = lit.as_ref()
19331 else {
19332 unreachable!()
19333 };
19334 s.clone()
19335 }
19336 Expression::Column(col) => col.name.name.clone(),
19337 _ => unit_str.clone(),
19338 };
19339 match target {
19340 DialectType::TSQL | DialectType::Fabric => {
19341 // Preserve original case of unit for TSQL
19342 let unit_name = match unit_str.as_str() {
19343 "YY" | "YYYY" => "YEAR".to_string(),
19344 "QQ" | "Q" => "QUARTER".to_string(),
19345 "MM" | "M" => "MONTH".to_string(),
19346 "WK" | "WW" => "WEEK".to_string(),
19347 "DD" | "D" | "DY" => "DAY".to_string(),
19348 "HH" => "HOUR".to_string(),
19349 "MI" | "N" => "MINUTE".to_string(),
19350 "SS" | "S" => "SECOND".to_string(),
19351 _ => raw_unit.clone(), // preserve original case
19352 };
19353 let mut args = f.args;
19354 args[0] =
19355 Expression::Identifier(Identifier::new(&unit_name));
19356 Ok(Expression::Function(Box::new(Function::new(
19357 "DATEPART".to_string(),
19358 args,
19359 ))))
19360 }
19361 DialectType::Spark | DialectType::Databricks => {
19362 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
19363 // Preserve original case for non-abbreviation units
19364 let unit = match unit_str.as_str() {
19365 "YY" | "YYYY" => "YEAR".to_string(),
19366 "QQ" | "Q" => "QUARTER".to_string(),
19367 "MM" | "M" => "MONTH".to_string(),
19368 "WK" | "WW" => "WEEK".to_string(),
19369 "DD" | "D" | "DY" => "DAY".to_string(),
19370 "HH" => "HOUR".to_string(),
19371 "MI" | "N" => "MINUTE".to_string(),
19372 "SS" | "S" => "SECOND".to_string(),
19373 _ => raw_unit, // preserve original case
19374 };
19375 Ok(Expression::Extract(Box::new(
19376 crate::expressions::ExtractFunc {
19377 this: f.args[1].clone(),
19378 field: crate::expressions::DateTimeField::Custom(
19379 unit,
19380 ),
19381 },
19382 )))
19383 }
19384 _ => Ok(Expression::Function(Box::new(Function::new(
19385 "DATE_PART".to_string(),
19386 f.args,
19387 )))),
19388 }
19389 }
19390 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
19391 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
19392 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
19393 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
19394 "DATENAME" if f.args.len() == 2 => {
19395 let unit_str = Self::get_unit_str_static(&f.args[0]);
19396 let date_expr = f.args[1].clone();
19397 match unit_str.as_str() {
19398 "MM" | "M" | "MONTH" => match target {
19399 DialectType::TSQL => {
19400 let cast_date = Expression::Cast(Box::new(
19401 crate::expressions::Cast {
19402 this: date_expr,
19403 to: DataType::Custom {
19404 name: "DATETIME2".to_string(),
19405 },
19406 trailing_comments: Vec::new(),
19407 double_colon_syntax: false,
19408 format: None,
19409 default: None,
19410 inferred_type: None,
19411 },
19412 ));
19413 Ok(Expression::Function(Box::new(Function::new(
19414 "FORMAT".to_string(),
19415 vec![cast_date, Expression::string("MMMM")],
19416 ))))
19417 }
19418 DialectType::Spark | DialectType::Databricks => {
19419 let cast_date = Expression::Cast(Box::new(
19420 crate::expressions::Cast {
19421 this: date_expr,
19422 to: DataType::Timestamp {
19423 timezone: false,
19424 precision: None,
19425 },
19426 trailing_comments: Vec::new(),
19427 double_colon_syntax: false,
19428 format: None,
19429 default: None,
19430 inferred_type: None,
19431 },
19432 ));
19433 Ok(Expression::Function(Box::new(Function::new(
19434 "DATE_FORMAT".to_string(),
19435 vec![cast_date, Expression::string("MMMM")],
19436 ))))
19437 }
19438 _ => Ok(Expression::Function(f)),
19439 },
19440 "DW" | "WEEKDAY" => match target {
19441 DialectType::TSQL => {
19442 let cast_date = Expression::Cast(Box::new(
19443 crate::expressions::Cast {
19444 this: date_expr,
19445 to: DataType::Custom {
19446 name: "DATETIME2".to_string(),
19447 },
19448 trailing_comments: Vec::new(),
19449 double_colon_syntax: false,
19450 format: None,
19451 default: None,
19452 inferred_type: None,
19453 },
19454 ));
19455 Ok(Expression::Function(Box::new(Function::new(
19456 "FORMAT".to_string(),
19457 vec![cast_date, Expression::string("dddd")],
19458 ))))
19459 }
19460 DialectType::Spark | DialectType::Databricks => {
19461 let cast_date = Expression::Cast(Box::new(
19462 crate::expressions::Cast {
19463 this: date_expr,
19464 to: DataType::Timestamp {
19465 timezone: false,
19466 precision: None,
19467 },
19468 trailing_comments: Vec::new(),
19469 double_colon_syntax: false,
19470 format: None,
19471 default: None,
19472 inferred_type: None,
19473 },
19474 ));
19475 Ok(Expression::Function(Box::new(Function::new(
19476 "DATE_FORMAT".to_string(),
19477 vec![cast_date, Expression::string("EEEE")],
19478 ))))
19479 }
19480 _ => Ok(Expression::Function(f)),
19481 },
19482 _ => Ok(Expression::Function(f)),
19483 }
19484 }
19485 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
19486 "STRING_AGG" if f.args.len() >= 2 => {
19487 let x = f.args[0].clone();
19488 let sep = f.args[1].clone();
19489 match target {
19490 DialectType::MySQL
19491 | DialectType::SingleStore
19492 | DialectType::Doris
19493 | DialectType::StarRocks => Ok(Expression::GroupConcat(
19494 Box::new(crate::expressions::GroupConcatFunc {
19495 this: x,
19496 separator: Some(sep),
19497 order_by: None,
19498 distinct: false,
19499 filter: None,
19500 limit: None,
19501 inferred_type: None,
19502 }),
19503 )),
19504 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
19505 crate::expressions::GroupConcatFunc {
19506 this: x,
19507 separator: Some(sep),
19508 order_by: None,
19509 distinct: false,
19510 filter: None,
19511 limit: None,
19512 inferred_type: None,
19513 },
19514 ))),
19515 DialectType::PostgreSQL | DialectType::Redshift => {
19516 Ok(Expression::StringAgg(Box::new(
19517 crate::expressions::StringAggFunc {
19518 this: x,
19519 separator: Some(sep),
19520 order_by: None,
19521 distinct: false,
19522 filter: None,
19523 limit: None,
19524 inferred_type: None,
19525 },
19526 )))
19527 }
19528 _ => Ok(Expression::Function(f)),
19529 }
19530 }
19531 "TRY_DIVIDE" if f.args.len() == 2 => {
19532 let mut args = f.args;
19533 let x = args.remove(0);
19534 let y = args.remove(0);
19535 match target {
19536 DialectType::Spark | DialectType::Databricks => {
19537 Ok(Expression::Function(Box::new(Function::new(
19538 "TRY_DIVIDE".to_string(),
19539 vec![x, y],
19540 ))))
19541 }
19542 DialectType::Snowflake => {
19543 let y_ref = match &y {
19544 Expression::Column(_)
19545 | Expression::Literal(_)
19546 | Expression::Identifier(_) => y.clone(),
19547 _ => Expression::Paren(Box::new(Paren {
19548 this: y.clone(),
19549 trailing_comments: vec![],
19550 })),
19551 };
19552 let x_ref = match &x {
19553 Expression::Column(_)
19554 | Expression::Literal(_)
19555 | Expression::Identifier(_) => x.clone(),
19556 _ => Expression::Paren(Box::new(Paren {
19557 this: x.clone(),
19558 trailing_comments: vec![],
19559 })),
19560 };
19561 let condition = Expression::Neq(Box::new(
19562 crate::expressions::BinaryOp::new(
19563 y_ref.clone(),
19564 Expression::number(0),
19565 ),
19566 ));
19567 let div_expr = Expression::Div(Box::new(
19568 crate::expressions::BinaryOp::new(x_ref, y_ref),
19569 ));
19570 Ok(Expression::IfFunc(Box::new(
19571 crate::expressions::IfFunc {
19572 condition,
19573 true_value: div_expr,
19574 false_value: Some(Expression::Null(Null)),
19575 original_name: Some("IFF".to_string()),
19576 inferred_type: None,
19577 },
19578 )))
19579 }
19580 DialectType::DuckDB => {
19581 let y_ref = match &y {
19582 Expression::Column(_)
19583 | Expression::Literal(_)
19584 | Expression::Identifier(_) => y.clone(),
19585 _ => Expression::Paren(Box::new(Paren {
19586 this: y.clone(),
19587 trailing_comments: vec![],
19588 })),
19589 };
19590 let x_ref = match &x {
19591 Expression::Column(_)
19592 | Expression::Literal(_)
19593 | Expression::Identifier(_) => x.clone(),
19594 _ => Expression::Paren(Box::new(Paren {
19595 this: x.clone(),
19596 trailing_comments: vec![],
19597 })),
19598 };
19599 let condition = Expression::Neq(Box::new(
19600 crate::expressions::BinaryOp::new(
19601 y_ref.clone(),
19602 Expression::number(0),
19603 ),
19604 ));
19605 let div_expr = Expression::Div(Box::new(
19606 crate::expressions::BinaryOp::new(x_ref, y_ref),
19607 ));
19608 Ok(Expression::Case(Box::new(Case {
19609 operand: None,
19610 whens: vec![(condition, div_expr)],
19611 else_: Some(Expression::Null(Null)),
19612 comments: Vec::new(),
19613 inferred_type: None,
19614 })))
19615 }
19616 _ => Ok(Expression::Function(Box::new(Function::new(
19617 "TRY_DIVIDE".to_string(),
19618 vec![x, y],
19619 )))),
19620 }
19621 }
19622 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
19623 "JSON_ARRAYAGG" => match target {
19624 DialectType::PostgreSQL => {
19625 Ok(Expression::Function(Box::new(Function {
19626 name: "JSON_AGG".to_string(),
19627 ..(*f)
19628 })))
19629 }
19630 _ => Ok(Expression::Function(f)),
19631 },
19632 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
19633 "SCHEMA_NAME" => match target {
19634 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
19635 crate::expressions::CurrentSchema { this: None },
19636 ))),
19637 DialectType::SQLite => Ok(Expression::string("main")),
19638 _ => Ok(Expression::Function(f)),
19639 },
19640 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
19641 "TO_TIMESTAMP"
19642 if f.args.len() == 2
19643 && matches!(
19644 source,
19645 DialectType::Spark
19646 | DialectType::Databricks
19647 | DialectType::Hive
19648 )
19649 && matches!(target, DialectType::DuckDB) =>
19650 {
19651 let mut args = f.args;
19652 let val = args.remove(0);
19653 let fmt_expr = args.remove(0);
19654 if let Expression::Literal(ref lit) = fmt_expr {
19655 if let Literal::String(ref s) = lit.as_ref() {
19656 // Convert Java/Spark format to C strptime format
19657 fn java_to_c_fmt(fmt: &str) -> String {
19658 let result = fmt
19659 .replace("yyyy", "%Y")
19660 .replace("SSSSSS", "%f")
19661 .replace("EEEE", "%W")
19662 .replace("MM", "%m")
19663 .replace("dd", "%d")
19664 .replace("HH", "%H")
19665 .replace("mm", "%M")
19666 .replace("ss", "%S")
19667 .replace("yy", "%y");
19668 let mut out = String::new();
19669 let chars: Vec<char> = result.chars().collect();
19670 let mut i = 0;
19671 while i < chars.len() {
19672 if chars[i] == '%' && i + 1 < chars.len() {
19673 out.push(chars[i]);
19674 out.push(chars[i + 1]);
19675 i += 2;
19676 } else if chars[i] == 'z' {
19677 out.push_str("%Z");
19678 i += 1;
19679 } else if chars[i] == 'Z' {
19680 out.push_str("%z");
19681 i += 1;
19682 } else {
19683 out.push(chars[i]);
19684 i += 1;
19685 }
19686 }
19687 out
19688 }
19689 let c_fmt = java_to_c_fmt(s);
19690 Ok(Expression::Function(Box::new(Function::new(
19691 "STRPTIME".to_string(),
19692 vec![val, Expression::string(&c_fmt)],
19693 ))))
19694 } else {
19695 Ok(Expression::Function(Box::new(Function::new(
19696 "STRPTIME".to_string(),
19697 vec![val, fmt_expr],
19698 ))))
19699 }
19700 } else {
19701 Ok(Expression::Function(Box::new(Function::new(
19702 "STRPTIME".to_string(),
19703 vec![val, fmt_expr],
19704 ))))
19705 }
19706 }
19707 // TO_DATE(x) 1-arg from Doris: date conversion
19708 "TO_DATE"
19709 if f.args.len() == 1
19710 && matches!(
19711 source,
19712 DialectType::Doris | DialectType::StarRocks
19713 ) =>
19714 {
19715 let arg = f.args.into_iter().next().unwrap();
19716 match target {
19717 DialectType::Oracle
19718 | DialectType::DuckDB
19719 | DialectType::TSQL => {
19720 // CAST(x AS DATE)
19721 Ok(Expression::Cast(Box::new(Cast {
19722 this: arg,
19723 to: DataType::Date,
19724 double_colon_syntax: false,
19725 trailing_comments: vec![],
19726 format: None,
19727 default: None,
19728 inferred_type: None,
19729 })))
19730 }
19731 DialectType::MySQL | DialectType::SingleStore => {
19732 // DATE(x)
19733 Ok(Expression::Function(Box::new(Function::new(
19734 "DATE".to_string(),
19735 vec![arg],
19736 ))))
19737 }
19738 _ => {
19739 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
19740 Ok(Expression::Function(Box::new(Function::new(
19741 "TO_DATE".to_string(),
19742 vec![arg],
19743 ))))
19744 }
19745 }
19746 }
19747 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
19748 "TO_DATE"
19749 if f.args.len() == 1
19750 && matches!(
19751 source,
19752 DialectType::Spark
19753 | DialectType::Databricks
19754 | DialectType::Hive
19755 ) =>
19756 {
19757 let arg = f.args.into_iter().next().unwrap();
19758 match target {
19759 DialectType::DuckDB => {
19760 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
19761 Ok(Expression::TryCast(Box::new(Cast {
19762 this: arg,
19763 to: DataType::Date,
19764 double_colon_syntax: false,
19765 trailing_comments: vec![],
19766 format: None,
19767 default: None,
19768 inferred_type: None,
19769 })))
19770 }
19771 DialectType::Presto
19772 | DialectType::Trino
19773 | DialectType::Athena => {
19774 // CAST(CAST(x AS TIMESTAMP) AS DATE)
19775 Ok(Self::double_cast_timestamp_date(arg))
19776 }
19777 DialectType::Snowflake => {
19778 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
19779 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
19780 Ok(Expression::Function(Box::new(Function::new(
19781 "TRY_TO_DATE".to_string(),
19782 vec![arg, Expression::string("yyyy-mm-DD")],
19783 ))))
19784 }
19785 _ => {
19786 // Default: keep as TO_DATE(x)
19787 Ok(Expression::Function(Box::new(Function::new(
19788 "TO_DATE".to_string(),
19789 vec![arg],
19790 ))))
19791 }
19792 }
19793 }
19794 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
19795 "TO_DATE"
19796 if f.args.len() == 2
19797 && matches!(
19798 source,
19799 DialectType::Spark
19800 | DialectType::Databricks
19801 | DialectType::Hive
19802 ) =>
19803 {
19804 let mut args = f.args;
19805 let val = args.remove(0);
19806 let fmt_expr = args.remove(0);
19807 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
19808
19809 if is_default_format {
19810 // Default format: same as 1-arg form
19811 match target {
19812 DialectType::DuckDB => {
19813 Ok(Expression::TryCast(Box::new(Cast {
19814 this: val,
19815 to: DataType::Date,
19816 double_colon_syntax: false,
19817 trailing_comments: vec![],
19818 format: None,
19819 default: None,
19820 inferred_type: None,
19821 })))
19822 }
19823 DialectType::Presto
19824 | DialectType::Trino
19825 | DialectType::Athena => {
19826 Ok(Self::double_cast_timestamp_date(val))
19827 }
19828 DialectType::Snowflake => {
19829 // TRY_TO_DATE(x, format) with Snowflake format mapping
19830 let sf_fmt = "yyyy-MM-dd"
19831 .replace("yyyy", "yyyy")
19832 .replace("MM", "mm")
19833 .replace("dd", "DD");
19834 Ok(Expression::Function(Box::new(Function::new(
19835 "TRY_TO_DATE".to_string(),
19836 vec![val, Expression::string(&sf_fmt)],
19837 ))))
19838 }
19839 _ => Ok(Expression::Function(Box::new(Function::new(
19840 "TO_DATE".to_string(),
19841 vec![val],
19842 )))),
19843 }
19844 } else {
19845 // Non-default format: use format-based parsing
19846 if let Expression::Literal(ref lit) = fmt_expr {
19847 if let Literal::String(ref s) = lit.as_ref() {
19848 match target {
19849 DialectType::DuckDB => {
19850 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
19851 fn java_to_c_fmt_todate(fmt: &str) -> String {
19852 let result = fmt
19853 .replace("yyyy", "%Y")
19854 .replace("SSSSSS", "%f")
19855 .replace("EEEE", "%W")
19856 .replace("MM", "%m")
19857 .replace("dd", "%d")
19858 .replace("HH", "%H")
19859 .replace("mm", "%M")
19860 .replace("ss", "%S")
19861 .replace("yy", "%y");
19862 let mut out = String::new();
19863 let chars: Vec<char> =
19864 result.chars().collect();
19865 let mut i = 0;
19866 while i < chars.len() {
19867 if chars[i] == '%'
19868 && i + 1 < chars.len()
19869 {
19870 out.push(chars[i]);
19871 out.push(chars[i + 1]);
19872 i += 2;
19873 } else if chars[i] == 'z' {
19874 out.push_str("%Z");
19875 i += 1;
19876 } else if chars[i] == 'Z' {
19877 out.push_str("%z");
19878 i += 1;
19879 } else {
19880 out.push(chars[i]);
19881 i += 1;
19882 }
19883 }
19884 out
19885 }
19886 let c_fmt = java_to_c_fmt_todate(s);
19887 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
19888 let try_strptime = Expression::Function(
19889 Box::new(Function::new(
19890 "TRY_STRPTIME".to_string(),
19891 vec![val, Expression::string(&c_fmt)],
19892 )),
19893 );
19894 let cast_ts =
19895 Expression::Cast(Box::new(Cast {
19896 this: try_strptime,
19897 to: DataType::Timestamp {
19898 precision: None,
19899 timezone: false,
19900 },
19901 double_colon_syntax: false,
19902 trailing_comments: vec![],
19903 format: None,
19904 default: None,
19905 inferred_type: None,
19906 }));
19907 Ok(Expression::Cast(Box::new(Cast {
19908 this: cast_ts,
19909 to: DataType::Date,
19910 double_colon_syntax: false,
19911 trailing_comments: vec![],
19912 format: None,
19913 default: None,
19914 inferred_type: None,
19915 })))
19916 }
19917 DialectType::Presto
19918 | DialectType::Trino
19919 | DialectType::Athena => {
19920 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
19921 let p_fmt = s
19922 .replace("yyyy", "%Y")
19923 .replace("SSSSSS", "%f")
19924 .replace("MM", "%m")
19925 .replace("dd", "%d")
19926 .replace("HH", "%H")
19927 .replace("mm", "%M")
19928 .replace("ss", "%S")
19929 .replace("yy", "%y");
19930 let date_parse = Expression::Function(
19931 Box::new(Function::new(
19932 "DATE_PARSE".to_string(),
19933 vec![val, Expression::string(&p_fmt)],
19934 )),
19935 );
19936 Ok(Expression::Cast(Box::new(Cast {
19937 this: date_parse,
19938 to: DataType::Date,
19939 double_colon_syntax: false,
19940 trailing_comments: vec![],
19941 format: None,
19942 default: None,
19943 inferred_type: None,
19944 })))
19945 }
19946 DialectType::Snowflake => {
19947 // TRY_TO_DATE(x, snowflake_fmt)
19948 Ok(Expression::Function(Box::new(
19949 Function::new(
19950 "TRY_TO_DATE".to_string(),
19951 vec![val, Expression::string(s)],
19952 ),
19953 )))
19954 }
19955 _ => Ok(Expression::Function(Box::new(
19956 Function::new(
19957 "TO_DATE".to_string(),
19958 vec![val, fmt_expr],
19959 ),
19960 ))),
19961 }
19962 } else {
19963 Ok(Expression::Function(Box::new(Function::new(
19964 "TO_DATE".to_string(),
19965 vec![val, fmt_expr],
19966 ))))
19967 }
19968 } else {
19969 Ok(Expression::Function(Box::new(Function::new(
19970 "TO_DATE".to_string(),
19971 vec![val, fmt_expr],
19972 ))))
19973 }
19974 }
19975 }
19976 // TO_TIMESTAMP(x) 1-arg: epoch conversion
19977 "TO_TIMESTAMP"
19978 if f.args.len() == 1
19979 && matches!(source, DialectType::DuckDB)
19980 && matches!(
19981 target,
19982 DialectType::BigQuery
19983 | DialectType::Presto
19984 | DialectType::Trino
19985 | DialectType::Hive
19986 | DialectType::Spark
19987 | DialectType::Databricks
19988 | DialectType::Athena
19989 ) =>
19990 {
19991 let arg = f.args.into_iter().next().unwrap();
19992 let func_name = match target {
19993 DialectType::BigQuery => "TIMESTAMP_SECONDS",
19994 DialectType::Presto
19995 | DialectType::Trino
19996 | DialectType::Athena
19997 | DialectType::Hive
19998 | DialectType::Spark
19999 | DialectType::Databricks => "FROM_UNIXTIME",
20000 _ => "TO_TIMESTAMP",
20001 };
20002 Ok(Expression::Function(Box::new(Function::new(
20003 func_name.to_string(),
20004 vec![arg],
20005 ))))
20006 }
20007 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
20008 "CONCAT" if f.args.len() == 1 => {
20009 let arg = f.args.into_iter().next().unwrap();
20010 match target {
20011 DialectType::Presto
20012 | DialectType::Trino
20013 | DialectType::Athena => {
20014 // CONCAT(a) -> CAST(a AS VARCHAR)
20015 Ok(Expression::Cast(Box::new(Cast {
20016 this: arg,
20017 to: DataType::VarChar {
20018 length: None,
20019 parenthesized_length: false,
20020 },
20021 trailing_comments: vec![],
20022 double_colon_syntax: false,
20023 format: None,
20024 default: None,
20025 inferred_type: None,
20026 })))
20027 }
20028 DialectType::TSQL => {
20029 // CONCAT(a) -> a
20030 Ok(arg)
20031 }
20032 DialectType::DuckDB => {
20033 // Keep CONCAT(a) for DuckDB (native support)
20034 Ok(Expression::Function(Box::new(Function::new(
20035 "CONCAT".to_string(),
20036 vec![arg],
20037 ))))
20038 }
20039 DialectType::Spark | DialectType::Databricks => {
20040 let coalesced = Expression::Coalesce(Box::new(
20041 crate::expressions::VarArgFunc {
20042 expressions: vec![arg, Expression::string("")],
20043 original_name: None,
20044 inferred_type: None,
20045 },
20046 ));
20047 Ok(Expression::Function(Box::new(Function::new(
20048 "CONCAT".to_string(),
20049 vec![coalesced],
20050 ))))
20051 }
20052 _ => Ok(Expression::Function(Box::new(Function::new(
20053 "CONCAT".to_string(),
20054 vec![arg],
20055 )))),
20056 }
20057 }
20058 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
20059 "REGEXP_EXTRACT"
20060 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
20061 {
20062 // If group_index is 0, drop it
20063 let drop_group = match &f.args[2] {
20064 Expression::Literal(lit)
20065 if matches!(lit.as_ref(), Literal::Number(_)) =>
20066 {
20067 let Literal::Number(n) = lit.as_ref() else {
20068 unreachable!()
20069 };
20070 n == "0"
20071 }
20072 _ => false,
20073 };
20074 if drop_group {
20075 let mut args = f.args;
20076 args.truncate(2);
20077 Ok(Expression::Function(Box::new(Function::new(
20078 "REGEXP_EXTRACT".to_string(),
20079 args,
20080 ))))
20081 } else {
20082 Ok(Expression::Function(f))
20083 }
20084 }
20085 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
20086 "REGEXP_EXTRACT"
20087 if f.args.len() == 4
20088 && matches!(target, DialectType::Snowflake) =>
20089 {
20090 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
20091 let mut args = f.args;
20092 let this = args.remove(0);
20093 let pattern = args.remove(0);
20094 let group = args.remove(0);
20095 let flags = args.remove(0);
20096 Ok(Expression::Function(Box::new(Function::new(
20097 "REGEXP_SUBSTR".to_string(),
20098 vec![
20099 this,
20100 pattern,
20101 Expression::number(1),
20102 Expression::number(1),
20103 flags,
20104 group,
20105 ],
20106 ))))
20107 }
20108 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
20109 "REGEXP_SUBSTR"
20110 if f.args.len() == 3
20111 && matches!(
20112 target,
20113 DialectType::DuckDB
20114 | DialectType::Presto
20115 | DialectType::Trino
20116 | DialectType::Spark
20117 | DialectType::Databricks
20118 ) =>
20119 {
20120 let mut args = f.args;
20121 let this = args.remove(0);
20122 let pattern = args.remove(0);
20123 let position = args.remove(0);
20124 // Wrap subject in SUBSTRING(this, position) to apply the offset
20125 let substring_expr = Expression::Function(Box::new(Function::new(
20126 "SUBSTRING".to_string(),
20127 vec![this, position],
20128 )));
20129 let target_name = match target {
20130 DialectType::DuckDB => "REGEXP_EXTRACT",
20131 _ => "REGEXP_EXTRACT",
20132 };
20133 Ok(Expression::Function(Box::new(Function::new(
20134 target_name.to_string(),
20135 vec![substring_expr, pattern],
20136 ))))
20137 }
20138 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
20139 "TO_DAYS" if f.args.len() == 1 => {
20140 let x = f.args.into_iter().next().unwrap();
20141 let epoch = Expression::string("0000-01-01");
20142 // Build the final target-specific expression directly
20143 let datediff_expr = match target {
20144 DialectType::MySQL | DialectType::SingleStore => {
20145 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
20146 Expression::Function(Box::new(Function::new(
20147 "DATEDIFF".to_string(),
20148 vec![x, epoch],
20149 )))
20150 }
20151 DialectType::DuckDB => {
20152 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
20153 let cast_epoch = Expression::Cast(Box::new(Cast {
20154 this: epoch,
20155 to: DataType::Date,
20156 trailing_comments: Vec::new(),
20157 double_colon_syntax: false,
20158 format: None,
20159 default: None,
20160 inferred_type: None,
20161 }));
20162 let cast_x = Expression::Cast(Box::new(Cast {
20163 this: x,
20164 to: DataType::Date,
20165 trailing_comments: Vec::new(),
20166 double_colon_syntax: false,
20167 format: None,
20168 default: None,
20169 inferred_type: None,
20170 }));
20171 Expression::Function(Box::new(Function::new(
20172 "DATE_DIFF".to_string(),
20173 vec![Expression::string("DAY"), cast_epoch, cast_x],
20174 )))
20175 }
20176 DialectType::Presto
20177 | DialectType::Trino
20178 | DialectType::Athena => {
20179 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
20180 let cast_epoch = Self::double_cast_timestamp_date(epoch);
20181 let cast_x = Self::double_cast_timestamp_date(x);
20182 Expression::Function(Box::new(Function::new(
20183 "DATE_DIFF".to_string(),
20184 vec![Expression::string("DAY"), cast_epoch, cast_x],
20185 )))
20186 }
20187 _ => {
20188 // Default: (DATEDIFF(x, '0000-01-01') + 1)
20189 Expression::Function(Box::new(Function::new(
20190 "DATEDIFF".to_string(),
20191 vec![x, epoch],
20192 )))
20193 }
20194 };
20195 let add_one = Expression::Add(Box::new(BinaryOp::new(
20196 datediff_expr,
20197 Expression::number(1),
20198 )));
20199 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
20200 this: add_one,
20201 trailing_comments: Vec::new(),
20202 })))
20203 }
20204 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
20205 "STR_TO_DATE"
20206 if f.args.len() == 2
20207 && matches!(
20208 target,
20209 DialectType::Presto | DialectType::Trino
20210 ) =>
20211 {
20212 let mut args = f.args;
20213 let x = args.remove(0);
20214 let format_expr = args.remove(0);
20215 // Check if the format contains time components
20216 let has_time = if let Expression::Literal(ref lit) = format_expr {
20217 if let Literal::String(ref fmt) = lit.as_ref() {
20218 fmt.contains("%H")
20219 || fmt.contains("%T")
20220 || fmt.contains("%M")
20221 || fmt.contains("%S")
20222 || fmt.contains("%I")
20223 || fmt.contains("%p")
20224 } else {
20225 false
20226 }
20227 } else {
20228 false
20229 };
20230 let date_parse = Expression::Function(Box::new(Function::new(
20231 "DATE_PARSE".to_string(),
20232 vec![x, format_expr],
20233 )));
20234 if has_time {
20235 // Has time components: just DATE_PARSE
20236 Ok(date_parse)
20237 } else {
20238 // Date-only: CAST(DATE_PARSE(...) AS DATE)
20239 Ok(Expression::Cast(Box::new(Cast {
20240 this: date_parse,
20241 to: DataType::Date,
20242 trailing_comments: Vec::new(),
20243 double_colon_syntax: false,
20244 format: None,
20245 default: None,
20246 inferred_type: None,
20247 })))
20248 }
20249 }
20250 "STR_TO_DATE"
20251 if f.args.len() == 2
20252 && matches!(
20253 target,
20254 DialectType::PostgreSQL | DialectType::Redshift
20255 ) =>
20256 {
20257 let mut args = f.args;
20258 let x = args.remove(0);
20259 let fmt = args.remove(0);
20260 let pg_fmt = match fmt {
20261 Expression::Literal(lit)
20262 if matches!(lit.as_ref(), Literal::String(_)) =>
20263 {
20264 let Literal::String(s) = lit.as_ref() else {
20265 unreachable!()
20266 };
20267 Expression::string(
20268 &s.replace("%Y", "YYYY")
20269 .replace("%m", "MM")
20270 .replace("%d", "DD")
20271 .replace("%H", "HH24")
20272 .replace("%M", "MI")
20273 .replace("%S", "SS"),
20274 )
20275 }
20276 other => other,
20277 };
20278 let to_date = Expression::Function(Box::new(Function::new(
20279 "TO_DATE".to_string(),
20280 vec![x, pg_fmt],
20281 )));
20282 Ok(Expression::Cast(Box::new(Cast {
20283 this: to_date,
20284 to: DataType::Timestamp {
20285 timezone: false,
20286 precision: None,
20287 },
20288 trailing_comments: Vec::new(),
20289 double_colon_syntax: false,
20290 format: None,
20291 default: None,
20292 inferred_type: None,
20293 })))
20294 }
20295 // RANGE(start, end) -> GENERATE_SERIES for SQLite
20296 "RANGE"
20297 if (f.args.len() == 1 || f.args.len() == 2)
20298 && matches!(target, DialectType::SQLite) =>
20299 {
20300 if f.args.len() == 2 {
20301 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
20302 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
20303 let mut args = f.args;
20304 let start = args.remove(0);
20305 let end = args.remove(0);
20306 Ok(Expression::Function(Box::new(Function::new(
20307 "GENERATE_SERIES".to_string(),
20308 vec![start, end],
20309 ))))
20310 } else {
20311 Ok(Expression::Function(f))
20312 }
20313 }
20314 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
20315 // When source is Snowflake, keep as-is (args already in correct form)
20316 "UNIFORM"
20317 if matches!(target, DialectType::Snowflake)
20318 && (f.args.len() == 2 || f.args.len() == 3) =>
20319 {
20320 if matches!(source, DialectType::Snowflake) {
20321 // Snowflake -> Snowflake: keep as-is
20322 Ok(Expression::Function(f))
20323 } else {
20324 let mut args = f.args;
20325 let low = args.remove(0);
20326 let high = args.remove(0);
20327 let random = if !args.is_empty() {
20328 let seed = args.remove(0);
20329 Expression::Function(Box::new(Function::new(
20330 "RANDOM".to_string(),
20331 vec![seed],
20332 )))
20333 } else {
20334 Expression::Function(Box::new(Function::new(
20335 "RANDOM".to_string(),
20336 vec![],
20337 )))
20338 };
20339 Ok(Expression::Function(Box::new(Function::new(
20340 "UNIFORM".to_string(),
20341 vec![low, high, random],
20342 ))))
20343 }
20344 }
20345 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20346 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
20347 let mut args = f.args;
20348 let ts_arg = args.remove(0);
20349 let tz_arg = args.remove(0);
20350 // Cast string literal to TIMESTAMP for all targets
20351 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20352 {
20353 Expression::Cast(Box::new(Cast {
20354 this: ts_arg,
20355 to: DataType::Timestamp {
20356 timezone: false,
20357 precision: None,
20358 },
20359 trailing_comments: vec![],
20360 double_colon_syntax: false,
20361 format: None,
20362 default: None,
20363 inferred_type: None,
20364 }))
20365 } else {
20366 ts_arg
20367 };
20368 match target {
20369 DialectType::Spark | DialectType::Databricks => {
20370 Ok(Expression::Function(Box::new(Function::new(
20371 "TO_UTC_TIMESTAMP".to_string(),
20372 vec![ts_cast, tz_arg],
20373 ))))
20374 }
20375 DialectType::Snowflake => {
20376 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
20377 Ok(Expression::Function(Box::new(Function::new(
20378 "CONVERT_TIMEZONE".to_string(),
20379 vec![tz_arg, Expression::string("UTC"), ts_cast],
20380 ))))
20381 }
20382 DialectType::Presto
20383 | DialectType::Trino
20384 | DialectType::Athena => {
20385 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
20386 let wtz = Expression::Function(Box::new(Function::new(
20387 "WITH_TIMEZONE".to_string(),
20388 vec![ts_cast, tz_arg],
20389 )));
20390 Ok(Expression::AtTimeZone(Box::new(
20391 crate::expressions::AtTimeZone {
20392 this: wtz,
20393 zone: Expression::string("UTC"),
20394 },
20395 )))
20396 }
20397 DialectType::BigQuery => {
20398 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
20399 let cast_dt = Expression::Cast(Box::new(Cast {
20400 this: if let Expression::Cast(c) = ts_cast {
20401 c.this
20402 } else {
20403 ts_cast.clone()
20404 },
20405 to: DataType::Custom {
20406 name: "DATETIME".to_string(),
20407 },
20408 trailing_comments: vec![],
20409 double_colon_syntax: false,
20410 format: None,
20411 default: None,
20412 inferred_type: None,
20413 }));
20414 let ts_func =
20415 Expression::Function(Box::new(Function::new(
20416 "TIMESTAMP".to_string(),
20417 vec![cast_dt, tz_arg],
20418 )));
20419 Ok(Expression::Function(Box::new(Function::new(
20420 "DATETIME".to_string(),
20421 vec![ts_func, Expression::string("UTC")],
20422 ))))
20423 }
20424 _ => {
20425 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
20426 let atz1 = Expression::AtTimeZone(Box::new(
20427 crate::expressions::AtTimeZone {
20428 this: ts_cast,
20429 zone: tz_arg,
20430 },
20431 ));
20432 Ok(Expression::AtTimeZone(Box::new(
20433 crate::expressions::AtTimeZone {
20434 this: atz1,
20435 zone: Expression::string("UTC"),
20436 },
20437 )))
20438 }
20439 }
20440 }
20441 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20442 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
20443 let mut args = f.args;
20444 let ts_arg = args.remove(0);
20445 let tz_arg = args.remove(0);
20446 // Cast string literal to TIMESTAMP
20447 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20448 {
20449 Expression::Cast(Box::new(Cast {
20450 this: ts_arg,
20451 to: DataType::Timestamp {
20452 timezone: false,
20453 precision: None,
20454 },
20455 trailing_comments: vec![],
20456 double_colon_syntax: false,
20457 format: None,
20458 default: None,
20459 inferred_type: None,
20460 }))
20461 } else {
20462 ts_arg
20463 };
20464 match target {
20465 DialectType::Spark | DialectType::Databricks => {
20466 Ok(Expression::Function(Box::new(Function::new(
20467 "FROM_UTC_TIMESTAMP".to_string(),
20468 vec![ts_cast, tz_arg],
20469 ))))
20470 }
20471 DialectType::Presto
20472 | DialectType::Trino
20473 | DialectType::Athena => {
20474 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
20475 Ok(Expression::Function(Box::new(Function::new(
20476 "AT_TIMEZONE".to_string(),
20477 vec![ts_cast, tz_arg],
20478 ))))
20479 }
20480 DialectType::Snowflake => {
20481 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
20482 Ok(Expression::Function(Box::new(Function::new(
20483 "CONVERT_TIMEZONE".to_string(),
20484 vec![Expression::string("UTC"), tz_arg, ts_cast],
20485 ))))
20486 }
20487 _ => {
20488 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
20489 Ok(Expression::AtTimeZone(Box::new(
20490 crate::expressions::AtTimeZone {
20491 this: ts_cast,
20492 zone: tz_arg,
20493 },
20494 )))
20495 }
20496 }
20497 }
20498 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
20499 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
20500 let name = match target {
20501 DialectType::Snowflake => "OBJECT_CONSTRUCT",
20502 _ => "MAP",
20503 };
20504 Ok(Expression::Function(Box::new(Function::new(
20505 name.to_string(),
20506 f.args,
20507 ))))
20508 }
20509 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
20510 "STR_TO_MAP" if f.args.len() >= 1 => match target {
20511 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20512 Ok(Expression::Function(Box::new(Function::new(
20513 "SPLIT_TO_MAP".to_string(),
20514 f.args,
20515 ))))
20516 }
20517 _ => Ok(Expression::Function(f)),
20518 },
20519 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
20520 "TIME_TO_STR" if f.args.len() == 2 => {
20521 let mut args = f.args;
20522 let this = args.remove(0);
20523 let fmt_expr = args.remove(0);
20524 let format = if let Expression::Literal(lit) = fmt_expr {
20525 if let Literal::String(s) = lit.as_ref() {
20526 s.clone()
20527 } else {
20528 String::new()
20529 }
20530 } else {
20531 "%Y-%m-%d %H:%M:%S".to_string()
20532 };
20533 Ok(Expression::TimeToStr(Box::new(
20534 crate::expressions::TimeToStr {
20535 this: Box::new(this),
20536 format,
20537 culture: None,
20538 zone: None,
20539 },
20540 )))
20541 }
20542 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
20543 "STR_TO_TIME" if f.args.len() == 2 => {
20544 let mut args = f.args;
20545 let this = args.remove(0);
20546 let fmt_expr = args.remove(0);
20547 let format = if let Expression::Literal(lit) = fmt_expr {
20548 if let Literal::String(s) = lit.as_ref() {
20549 s.clone()
20550 } else {
20551 String::new()
20552 }
20553 } else {
20554 "%Y-%m-%d %H:%M:%S".to_string()
20555 };
20556 Ok(Expression::StrToTime(Box::new(
20557 crate::expressions::StrToTime {
20558 this: Box::new(this),
20559 format,
20560 zone: None,
20561 safe: None,
20562 target_type: None,
20563 },
20564 )))
20565 }
20566 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
20567 "STR_TO_UNIX" if f.args.len() >= 1 => {
20568 let mut args = f.args;
20569 let this = args.remove(0);
20570 let format = if !args.is_empty() {
20571 if let Expression::Literal(lit) = args.remove(0) {
20572 if let Literal::String(s) = lit.as_ref() {
20573 Some(s.clone())
20574 } else {
20575 None
20576 }
20577 } else {
20578 None
20579 }
20580 } else {
20581 None
20582 };
20583 Ok(Expression::StrToUnix(Box::new(
20584 crate::expressions::StrToUnix {
20585 this: Some(Box::new(this)),
20586 format,
20587 },
20588 )))
20589 }
20590 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
20591 "TIME_TO_UNIX" if f.args.len() == 1 => {
20592 let mut args = f.args;
20593 let this = args.remove(0);
20594 Ok(Expression::TimeToUnix(Box::new(
20595 crate::expressions::UnaryFunc {
20596 this,
20597 original_name: None,
20598 inferred_type: None,
20599 },
20600 )))
20601 }
20602 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
20603 "UNIX_TO_STR" if f.args.len() >= 1 => {
20604 let mut args = f.args;
20605 let this = args.remove(0);
20606 let format = if !args.is_empty() {
20607 if let Expression::Literal(lit) = args.remove(0) {
20608 if let Literal::String(s) = lit.as_ref() {
20609 Some(s.clone())
20610 } else {
20611 None
20612 }
20613 } else {
20614 None
20615 }
20616 } else {
20617 None
20618 };
20619 Ok(Expression::UnixToStr(Box::new(
20620 crate::expressions::UnixToStr {
20621 this: Box::new(this),
20622 format,
20623 },
20624 )))
20625 }
20626 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
20627 "UNIX_TO_TIME" if f.args.len() == 1 => {
20628 let mut args = f.args;
20629 let this = args.remove(0);
20630 Ok(Expression::UnixToTime(Box::new(
20631 crate::expressions::UnixToTime {
20632 this: Box::new(this),
20633 scale: None,
20634 zone: None,
20635 hours: None,
20636 minutes: None,
20637 format: None,
20638 target_type: None,
20639 },
20640 )))
20641 }
20642 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
20643 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
20644 let mut args = f.args;
20645 let this = args.remove(0);
20646 Ok(Expression::TimeStrToDate(Box::new(
20647 crate::expressions::UnaryFunc {
20648 this,
20649 original_name: None,
20650 inferred_type: None,
20651 },
20652 )))
20653 }
20654 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
20655 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
20656 let mut args = f.args;
20657 let this = args.remove(0);
20658 Ok(Expression::TimeStrToTime(Box::new(
20659 crate::expressions::TimeStrToTime {
20660 this: Box::new(this),
20661 zone: None,
20662 },
20663 )))
20664 }
20665 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
20666 "MONTHS_BETWEEN" if f.args.len() == 2 => {
20667 match target {
20668 DialectType::DuckDB => {
20669 let mut args = f.args;
20670 let end_date = args.remove(0);
20671 let start_date = args.remove(0);
20672 let cast_end = Self::ensure_cast_date(end_date);
20673 let cast_start = Self::ensure_cast_date(start_date);
20674 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
20675 let dd = Expression::Function(Box::new(Function::new(
20676 "DATE_DIFF".to_string(),
20677 vec![
20678 Expression::string("MONTH"),
20679 cast_start.clone(),
20680 cast_end.clone(),
20681 ],
20682 )));
20683 let day_end =
20684 Expression::Function(Box::new(Function::new(
20685 "DAY".to_string(),
20686 vec![cast_end.clone()],
20687 )));
20688 let day_start =
20689 Expression::Function(Box::new(Function::new(
20690 "DAY".to_string(),
20691 vec![cast_start.clone()],
20692 )));
20693 let last_day_end =
20694 Expression::Function(Box::new(Function::new(
20695 "LAST_DAY".to_string(),
20696 vec![cast_end.clone()],
20697 )));
20698 let last_day_start =
20699 Expression::Function(Box::new(Function::new(
20700 "LAST_DAY".to_string(),
20701 vec![cast_start.clone()],
20702 )));
20703 let day_last_end = Expression::Function(Box::new(
20704 Function::new("DAY".to_string(), vec![last_day_end]),
20705 ));
20706 let day_last_start = Expression::Function(Box::new(
20707 Function::new("DAY".to_string(), vec![last_day_start]),
20708 ));
20709 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20710 day_end.clone(),
20711 day_last_end,
20712 )));
20713 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20714 day_start.clone(),
20715 day_last_start,
20716 )));
20717 let both_cond =
20718 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20719 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
20720 day_end, day_start,
20721 )));
20722 let day_diff_paren = Expression::Paren(Box::new(
20723 crate::expressions::Paren {
20724 this: day_diff,
20725 trailing_comments: Vec::new(),
20726 },
20727 ));
20728 let frac = Expression::Div(Box::new(BinaryOp::new(
20729 day_diff_paren,
20730 Expression::Literal(Box::new(Literal::Number(
20731 "31.0".to_string(),
20732 ))),
20733 )));
20734 let case_expr = Expression::Case(Box::new(Case {
20735 operand: None,
20736 whens: vec![(both_cond, Expression::number(0))],
20737 else_: Some(frac),
20738 comments: Vec::new(),
20739 inferred_type: None,
20740 }));
20741 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20742 }
20743 DialectType::Snowflake | DialectType::Redshift => {
20744 let mut args = f.args;
20745 let end_date = args.remove(0);
20746 let start_date = args.remove(0);
20747 let unit = Expression::Identifier(Identifier::new("MONTH"));
20748 Ok(Expression::Function(Box::new(Function::new(
20749 "DATEDIFF".to_string(),
20750 vec![unit, start_date, end_date],
20751 ))))
20752 }
20753 DialectType::Presto
20754 | DialectType::Trino
20755 | DialectType::Athena => {
20756 let mut args = f.args;
20757 let end_date = args.remove(0);
20758 let start_date = args.remove(0);
20759 Ok(Expression::Function(Box::new(Function::new(
20760 "DATE_DIFF".to_string(),
20761 vec![Expression::string("MONTH"), start_date, end_date],
20762 ))))
20763 }
20764 _ => Ok(Expression::Function(f)),
20765 }
20766 }
20767 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
20768 // Drop the roundOff arg for non-Spark targets, keep it for Spark
20769 "MONTHS_BETWEEN" if f.args.len() == 3 => {
20770 match target {
20771 DialectType::Spark | DialectType::Databricks => {
20772 Ok(Expression::Function(f))
20773 }
20774 _ => {
20775 // Drop the 3rd arg and delegate to the 2-arg logic
20776 let mut args = f.args;
20777 let end_date = args.remove(0);
20778 let start_date = args.remove(0);
20779 // Re-create as 2-arg and process
20780 let f2 = Function::new(
20781 "MONTHS_BETWEEN".to_string(),
20782 vec![end_date, start_date],
20783 );
20784 let e2 = Expression::Function(Box::new(f2));
20785 Self::cross_dialect_normalize(e2, source, target)
20786 }
20787 }
20788 }
20789 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
20790 "TO_TIMESTAMP"
20791 if f.args.len() == 1
20792 && matches!(
20793 source,
20794 DialectType::Spark
20795 | DialectType::Databricks
20796 | DialectType::Hive
20797 ) =>
20798 {
20799 let arg = f.args.into_iter().next().unwrap();
20800 Ok(Expression::Cast(Box::new(Cast {
20801 this: arg,
20802 to: DataType::Timestamp {
20803 timezone: false,
20804 precision: None,
20805 },
20806 trailing_comments: vec![],
20807 double_colon_syntax: false,
20808 format: None,
20809 default: None,
20810 inferred_type: None,
20811 })))
20812 }
20813 // STRING(x) -> CAST(x AS STRING) for Spark target
20814 "STRING"
20815 if f.args.len() == 1
20816 && matches!(
20817 source,
20818 DialectType::Spark | DialectType::Databricks
20819 ) =>
20820 {
20821 let arg = f.args.into_iter().next().unwrap();
20822 let dt = match target {
20823 DialectType::Spark
20824 | DialectType::Databricks
20825 | DialectType::Hive => DataType::Custom {
20826 name: "STRING".to_string(),
20827 },
20828 _ => DataType::Text,
20829 };
20830 Ok(Expression::Cast(Box::new(Cast {
20831 this: arg,
20832 to: dt,
20833 trailing_comments: vec![],
20834 double_colon_syntax: false,
20835 format: None,
20836 default: None,
20837 inferred_type: None,
20838 })))
20839 }
20840 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
20841 "LOGICAL_OR" if f.args.len() == 1 => {
20842 let name = match target {
20843 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
20844 _ => "LOGICAL_OR",
20845 };
20846 Ok(Expression::Function(Box::new(Function::new(
20847 name.to_string(),
20848 f.args,
20849 ))))
20850 }
20851 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
20852 "SPLIT"
20853 if f.args.len() == 2
20854 && matches!(
20855 source,
20856 DialectType::Spark
20857 | DialectType::Databricks
20858 | DialectType::Hive
20859 ) =>
20860 {
20861 let name = match target {
20862 DialectType::DuckDB => "STR_SPLIT_REGEX",
20863 DialectType::Presto
20864 | DialectType::Trino
20865 | DialectType::Athena => "REGEXP_SPLIT",
20866 DialectType::Spark
20867 | DialectType::Databricks
20868 | DialectType::Hive => "SPLIT",
20869 _ => "SPLIT",
20870 };
20871 Ok(Expression::Function(Box::new(Function::new(
20872 name.to_string(),
20873 f.args,
20874 ))))
20875 }
20876 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
20877 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
20878 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20879 Ok(Expression::Function(Box::new(Function::new(
20880 "ELEMENT_AT".to_string(),
20881 f.args,
20882 ))))
20883 }
20884 DialectType::DuckDB => {
20885 let mut args = f.args;
20886 let arr = args.remove(0);
20887 let idx = args.remove(0);
20888 Ok(Expression::Subscript(Box::new(
20889 crate::expressions::Subscript {
20890 this: arr,
20891 index: idx,
20892 },
20893 )))
20894 }
20895 _ => Ok(Expression::Function(f)),
20896 },
20897 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
20898 "ARRAY_FILTER" if f.args.len() == 2 => {
20899 let name = match target {
20900 DialectType::DuckDB => "LIST_FILTER",
20901 DialectType::StarRocks => "ARRAY_FILTER",
20902 _ => "FILTER",
20903 };
20904 Ok(Expression::Function(Box::new(Function::new(
20905 name.to_string(),
20906 f.args,
20907 ))))
20908 }
20909 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
20910 "FILTER" if f.args.len() == 2 => {
20911 let name = match target {
20912 DialectType::DuckDB => "LIST_FILTER",
20913 DialectType::StarRocks => "ARRAY_FILTER",
20914 _ => "FILTER",
20915 };
20916 Ok(Expression::Function(Box::new(Function::new(
20917 name.to_string(),
20918 f.args,
20919 ))))
20920 }
20921 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
20922 "REDUCE" if f.args.len() >= 3 => {
20923 let name = match target {
20924 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
20925 _ => "REDUCE",
20926 };
20927 Ok(Expression::Function(Box::new(Function::new(
20928 name.to_string(),
20929 f.args,
20930 ))))
20931 }
20932 // CURRENT_SCHEMA() -> dialect-specific
20933 "CURRENT_SCHEMA" => {
20934 match target {
20935 DialectType::PostgreSQL => {
20936 // PostgreSQL: CURRENT_SCHEMA (no parens)
20937 Ok(Expression::Function(Box::new(Function {
20938 name: "CURRENT_SCHEMA".to_string(),
20939 args: vec![],
20940 distinct: false,
20941 trailing_comments: vec![],
20942 use_bracket_syntax: false,
20943 no_parens: true,
20944 quoted: false,
20945 span: None,
20946 inferred_type: None,
20947 })))
20948 }
20949 DialectType::MySQL
20950 | DialectType::Doris
20951 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
20952 Function::new("SCHEMA".to_string(), vec![]),
20953 ))),
20954 DialectType::TSQL => Ok(Expression::Function(Box::new(
20955 Function::new("SCHEMA_NAME".to_string(), vec![]),
20956 ))),
20957 DialectType::SQLite => Ok(Expression::Literal(Box::new(
20958 Literal::String("main".to_string()),
20959 ))),
20960 _ => Ok(Expression::Function(f)),
20961 }
20962 }
20963 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20964 "LTRIM" if f.args.len() == 2 => match target {
20965 DialectType::Spark
20966 | DialectType::Hive
20967 | DialectType::Databricks
20968 | DialectType::ClickHouse => {
20969 let mut args = f.args;
20970 let str_expr = args.remove(0);
20971 let chars = args.remove(0);
20972 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20973 this: str_expr,
20974 characters: Some(chars),
20975 position: crate::expressions::TrimPosition::Leading,
20976 sql_standard_syntax: true,
20977 position_explicit: true,
20978 })))
20979 }
20980 _ => Ok(Expression::Function(f)),
20981 },
20982 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20983 "RTRIM" if f.args.len() == 2 => match target {
20984 DialectType::Spark
20985 | DialectType::Hive
20986 | DialectType::Databricks
20987 | DialectType::ClickHouse => {
20988 let mut args = f.args;
20989 let str_expr = args.remove(0);
20990 let chars = args.remove(0);
20991 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20992 this: str_expr,
20993 characters: Some(chars),
20994 position: crate::expressions::TrimPosition::Trailing,
20995 sql_standard_syntax: true,
20996 position_explicit: true,
20997 })))
20998 }
20999 _ => Ok(Expression::Function(f)),
21000 },
21001 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
21002 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
21003 DialectType::ClickHouse => {
21004 let mut new_f = *f;
21005 new_f.name = "arrayReverse".to_string();
21006 Ok(Expression::Function(Box::new(new_f)))
21007 }
21008 _ => Ok(Expression::Function(f)),
21009 },
21010 // UUID() -> NEWID() for TSQL
21011 "UUID" if f.args.is_empty() => match target {
21012 DialectType::TSQL | DialectType::Fabric => {
21013 Ok(Expression::Function(Box::new(Function::new(
21014 "NEWID".to_string(),
21015 vec![],
21016 ))))
21017 }
21018 _ => Ok(Expression::Function(f)),
21019 },
21020 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
21021 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
21022 DialectType::ClickHouse => {
21023 let mut new_f = *f;
21024 new_f.name = "farmFingerprint64".to_string();
21025 Ok(Expression::Function(Box::new(new_f)))
21026 }
21027 DialectType::Redshift => {
21028 let mut new_f = *f;
21029 new_f.name = "FARMFINGERPRINT64".to_string();
21030 Ok(Expression::Function(Box::new(new_f)))
21031 }
21032 _ => Ok(Expression::Function(f)),
21033 },
21034 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
21035 "JSON_KEYS" => match target {
21036 DialectType::Databricks | DialectType::Spark => {
21037 let mut new_f = *f;
21038 new_f.name = "JSON_OBJECT_KEYS".to_string();
21039 Ok(Expression::Function(Box::new(new_f)))
21040 }
21041 DialectType::Snowflake => {
21042 let mut new_f = *f;
21043 new_f.name = "OBJECT_KEYS".to_string();
21044 Ok(Expression::Function(Box::new(new_f)))
21045 }
21046 _ => Ok(Expression::Function(f)),
21047 },
21048 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
21049 "WEEKOFYEAR" => match target {
21050 DialectType::Snowflake => {
21051 let mut new_f = *f;
21052 new_f.name = "WEEKISO".to_string();
21053 Ok(Expression::Function(Box::new(new_f)))
21054 }
21055 _ => Ok(Expression::Function(f)),
21056 },
21057 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
21058 "FORMAT"
21059 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
21060 {
21061 match target {
21062 DialectType::Databricks | DialectType::Spark => {
21063 let mut new_f = *f;
21064 new_f.name = "FORMAT_STRING".to_string();
21065 Ok(Expression::Function(Box::new(new_f)))
21066 }
21067 _ => Ok(Expression::Function(f)),
21068 }
21069 }
21070 // CONCAT_WS from Generic is null-propagating in SQLGlot fixtures.
21071 // Trino also requires non-separator arguments cast to VARCHAR.
21072 "CONCAT_WS" if f.args.len() >= 2 => {
21073 fn concat_ws_null_case(
21074 args: Vec<Expression>,
21075 else_expr: Expression,
21076 ) -> Expression {
21077 let mut null_checks = args.iter().cloned().map(|arg| {
21078 Expression::IsNull(Box::new(crate::expressions::IsNull {
21079 this: arg,
21080 not: false,
21081 postfix_form: false,
21082 }))
21083 });
21084 let first_null_check = null_checks
21085 .next()
21086 .expect("CONCAT_WS with >= 2 args must yield a null check");
21087 let null_check =
21088 null_checks.fold(first_null_check, |left, right| {
21089 Expression::Or(Box::new(BinaryOp {
21090 left,
21091 right,
21092 left_comments: Vec::new(),
21093 operator_comments: Vec::new(),
21094 trailing_comments: Vec::new(),
21095 inferred_type: None,
21096 }))
21097 });
21098 Expression::Case(Box::new(Case {
21099 operand: None,
21100 whens: vec![(null_check, Expression::Null(Null))],
21101 else_: Some(else_expr),
21102 comments: vec![],
21103 inferred_type: None,
21104 }))
21105 }
21106
21107 match target {
21108 DialectType::Trino
21109 if matches!(source, DialectType::Generic) =>
21110 {
21111 let original_args = f.args.clone();
21112 let mut args = f.args;
21113 let sep = args.remove(0);
21114 let cast_args: Vec<Expression> = args
21115 .into_iter()
21116 .map(|a| {
21117 Expression::Cast(Box::new(Cast {
21118 this: a,
21119 to: DataType::VarChar {
21120 length: None,
21121 parenthesized_length: false,
21122 },
21123 double_colon_syntax: false,
21124 trailing_comments: Vec::new(),
21125 format: None,
21126 default: None,
21127 inferred_type: None,
21128 }))
21129 })
21130 .collect();
21131 let mut new_args = vec![sep];
21132 new_args.extend(cast_args);
21133 let else_expr = Expression::Function(Box::new(
21134 Function::new("CONCAT_WS".to_string(), new_args),
21135 ));
21136 Ok(concat_ws_null_case(original_args, else_expr))
21137 }
21138 DialectType::Presto
21139 | DialectType::Trino
21140 | DialectType::Athena => {
21141 let mut args = f.args;
21142 let sep = args.remove(0);
21143 let cast_args: Vec<Expression> = args
21144 .into_iter()
21145 .map(|a| {
21146 Expression::Cast(Box::new(Cast {
21147 this: a,
21148 to: DataType::VarChar {
21149 length: None,
21150 parenthesized_length: false,
21151 },
21152 double_colon_syntax: false,
21153 trailing_comments: Vec::new(),
21154 format: None,
21155 default: None,
21156 inferred_type: None,
21157 }))
21158 })
21159 .collect();
21160 let mut new_args = vec![sep];
21161 new_args.extend(cast_args);
21162 Ok(Expression::Function(Box::new(Function::new(
21163 "CONCAT_WS".to_string(),
21164 new_args,
21165 ))))
21166 }
21167 DialectType::Spark
21168 | DialectType::Hive
21169 | DialectType::DuckDB
21170 if matches!(source, DialectType::Generic) =>
21171 {
21172 let args = f.args;
21173 let else_expr = Expression::Function(Box::new(
21174 Function::new("CONCAT_WS".to_string(), args.clone()),
21175 ));
21176 Ok(concat_ws_null_case(args, else_expr))
21177 }
21178 _ => Ok(Expression::Function(f)),
21179 }
21180 }
21181 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
21182 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
21183 DialectType::DuckDB
21184 if f.args.len() == 3
21185 && matches!(source, DialectType::Snowflake) =>
21186 {
21187 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
21188 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
21189 let mut args = f.args;
21190 let arr = args.remove(0);
21191 let start = args.remove(0);
21192 let end = args.remove(0);
21193
21194 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
21195 let adjusted_start = Expression::Case(Box::new(Case {
21196 operand: None,
21197 whens: vec![(
21198 Expression::Gte(Box::new(BinaryOp {
21199 left: start.clone(),
21200 right: Expression::number(0),
21201 left_comments: vec![],
21202 operator_comments: vec![],
21203 trailing_comments: vec![],
21204 inferred_type: None,
21205 })),
21206 Expression::Add(Box::new(BinaryOp {
21207 left: start.clone(),
21208 right: Expression::number(1),
21209 left_comments: vec![],
21210 operator_comments: vec![],
21211 trailing_comments: vec![],
21212 inferred_type: None,
21213 })),
21214 )],
21215 else_: Some(start),
21216 comments: vec![],
21217 inferred_type: None,
21218 }));
21219
21220 // CASE WHEN end < 0 THEN end - 1 ELSE end END
21221 let adjusted_end = Expression::Case(Box::new(Case {
21222 operand: None,
21223 whens: vec![(
21224 Expression::Lt(Box::new(BinaryOp {
21225 left: end.clone(),
21226 right: Expression::number(0),
21227 left_comments: vec![],
21228 operator_comments: vec![],
21229 trailing_comments: vec![],
21230 inferred_type: None,
21231 })),
21232 Expression::Sub(Box::new(BinaryOp {
21233 left: end.clone(),
21234 right: Expression::number(1),
21235 left_comments: vec![],
21236 operator_comments: vec![],
21237 trailing_comments: vec![],
21238 inferred_type: None,
21239 })),
21240 )],
21241 else_: Some(end),
21242 comments: vec![],
21243 inferred_type: None,
21244 }));
21245
21246 Ok(Expression::Function(Box::new(Function::new(
21247 "ARRAY_SLICE".to_string(),
21248 vec![arr, adjusted_start, adjusted_end],
21249 ))))
21250 }
21251 DialectType::Presto
21252 | DialectType::Trino
21253 | DialectType::Athena
21254 | DialectType::Databricks
21255 | DialectType::Spark => {
21256 let mut new_f = *f;
21257 new_f.name = "SLICE".to_string();
21258 Ok(Expression::Function(Box::new(new_f)))
21259 }
21260 DialectType::ClickHouse => {
21261 let mut new_f = *f;
21262 new_f.name = "arraySlice".to_string();
21263 Ok(Expression::Function(Box::new(new_f)))
21264 }
21265 _ => Ok(Expression::Function(f)),
21266 },
21267 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
21268 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
21269 DialectType::DuckDB => {
21270 let mut args = f.args;
21271 let arr = args.remove(0);
21272 let val = args.remove(0);
21273 Ok(Expression::Function(Box::new(Function::new(
21274 "LIST_PREPEND".to_string(),
21275 vec![val, arr],
21276 ))))
21277 }
21278 _ => Ok(Expression::Function(f)),
21279 },
21280 // ARRAY_REMOVE(arr, target) -> dialect-specific
21281 "ARRAY_REMOVE" if f.args.len() == 2 => {
21282 match target {
21283 DialectType::DuckDB => {
21284 let mut args = f.args;
21285 let arr = args.remove(0);
21286 let target_val = args.remove(0);
21287 let u_id = crate::expressions::Identifier::new("_u");
21288 // LIST_FILTER(arr, _u -> _u <> target)
21289 let lambda = Expression::Lambda(Box::new(
21290 crate::expressions::LambdaExpr {
21291 parameters: vec![u_id.clone()],
21292 body: Expression::Neq(Box::new(BinaryOp {
21293 left: Expression::Identifier(u_id),
21294 right: target_val,
21295 left_comments: Vec::new(),
21296 operator_comments: Vec::new(),
21297 trailing_comments: Vec::new(),
21298 inferred_type: None,
21299 })),
21300 colon: false,
21301 parameter_types: Vec::new(),
21302 },
21303 ));
21304 Ok(Expression::Function(Box::new(Function::new(
21305 "LIST_FILTER".to_string(),
21306 vec![arr, lambda],
21307 ))))
21308 }
21309 DialectType::ClickHouse => {
21310 let mut args = f.args;
21311 let arr = args.remove(0);
21312 let target_val = args.remove(0);
21313 let u_id = crate::expressions::Identifier::new("_u");
21314 // arrayFilter(_u -> _u <> target, arr)
21315 let lambda = Expression::Lambda(Box::new(
21316 crate::expressions::LambdaExpr {
21317 parameters: vec![u_id.clone()],
21318 body: Expression::Neq(Box::new(BinaryOp {
21319 left: Expression::Identifier(u_id),
21320 right: target_val,
21321 left_comments: Vec::new(),
21322 operator_comments: Vec::new(),
21323 trailing_comments: Vec::new(),
21324 inferred_type: None,
21325 })),
21326 colon: false,
21327 parameter_types: Vec::new(),
21328 },
21329 ));
21330 Ok(Expression::Function(Box::new(Function::new(
21331 "arrayFilter".to_string(),
21332 vec![lambda, arr],
21333 ))))
21334 }
21335 DialectType::BigQuery => {
21336 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
21337 let mut args = f.args;
21338 let arr = args.remove(0);
21339 let target_val = args.remove(0);
21340 let u_id = crate::expressions::Identifier::new("_u");
21341 let u_col = Expression::Column(Box::new(
21342 crate::expressions::Column {
21343 name: u_id.clone(),
21344 table: None,
21345 join_mark: false,
21346 trailing_comments: Vec::new(),
21347 span: None,
21348 inferred_type: None,
21349 },
21350 ));
21351 // UNNEST(the_array) AS _u
21352 let unnest_expr = Expression::Unnest(Box::new(
21353 crate::expressions::UnnestFunc {
21354 this: arr,
21355 expressions: Vec::new(),
21356 with_ordinality: false,
21357 alias: None,
21358 offset_alias: None,
21359 },
21360 ));
21361 let aliased_unnest = Expression::Alias(Box::new(
21362 crate::expressions::Alias {
21363 this: unnest_expr,
21364 alias: u_id.clone(),
21365 column_aliases: Vec::new(),
21366 alias_explicit_as: false,
21367 alias_keyword: None,
21368 pre_alias_comments: Vec::new(),
21369 trailing_comments: Vec::new(),
21370 inferred_type: None,
21371 },
21372 ));
21373 // _u <> target
21374 let where_cond = Expression::Neq(Box::new(BinaryOp {
21375 left: u_col.clone(),
21376 right: target_val,
21377 left_comments: Vec::new(),
21378 operator_comments: Vec::new(),
21379 trailing_comments: Vec::new(),
21380 inferred_type: None,
21381 }));
21382 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
21383 let subquery = Expression::Select(Box::new(
21384 crate::expressions::Select::new()
21385 .column(u_col)
21386 .from(aliased_unnest)
21387 .where_(where_cond),
21388 ));
21389 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
21390 Ok(Expression::ArrayFunc(Box::new(
21391 crate::expressions::ArrayConstructor {
21392 expressions: vec![subquery],
21393 bracket_notation: false,
21394 use_list_keyword: false,
21395 },
21396 )))
21397 }
21398 _ => Ok(Expression::Function(f)),
21399 }
21400 }
21401 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
21402 "PARSE_JSON" if f.args.len() == 1 => {
21403 match target {
21404 DialectType::SQLite
21405 | DialectType::Doris
21406 | DialectType::MySQL
21407 | DialectType::StarRocks => {
21408 // Strip PARSE_JSON, return the inner argument
21409 Ok(f.args.into_iter().next().unwrap())
21410 }
21411 _ => Ok(Expression::Function(f)),
21412 }
21413 }
21414 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
21415 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
21416 "JSON_REMOVE" => Ok(Expression::Function(f)),
21417 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
21418 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
21419 "JSON_SET" => Ok(Expression::Function(f)),
21420 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
21421 // Behavior per search value type:
21422 // NULL literal -> CASE WHEN x IS NULL THEN result
21423 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
21424 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
21425 "DECODE" if f.args.len() >= 3 => {
21426 // Keep as DECODE for targets that support it natively
21427 let keep_as_decode = matches!(
21428 target,
21429 DialectType::Oracle
21430 | DialectType::Snowflake
21431 | DialectType::Redshift
21432 | DialectType::Teradata
21433 | DialectType::Spark
21434 | DialectType::Databricks
21435 );
21436 if keep_as_decode {
21437 return Ok(Expression::Function(f));
21438 }
21439
21440 let mut args = f.args;
21441 let this_expr = args.remove(0);
21442 let mut pairs = Vec::new();
21443 let mut default = None;
21444 let mut i = 0;
21445 while i + 1 < args.len() {
21446 pairs.push((args[i].clone(), args[i + 1].clone()));
21447 i += 2;
21448 }
21449 if i < args.len() {
21450 default = Some(args[i].clone());
21451 }
21452 // Helper: check if expression is a literal value
21453 fn is_literal(e: &Expression) -> bool {
21454 matches!(
21455 e,
21456 Expression::Literal(_)
21457 | Expression::Boolean(_)
21458 | Expression::Neg(_)
21459 )
21460 }
21461 let whens: Vec<(Expression, Expression)> = pairs
21462 .into_iter()
21463 .map(|(search, result)| {
21464 if matches!(&search, Expression::Null(_)) {
21465 // NULL search -> IS NULL
21466 let condition = Expression::Is(Box::new(BinaryOp {
21467 left: this_expr.clone(),
21468 right: Expression::Null(crate::expressions::Null),
21469 left_comments: Vec::new(),
21470 operator_comments: Vec::new(),
21471 trailing_comments: Vec::new(),
21472 inferred_type: None,
21473 }));
21474 (condition, result)
21475 } else if is_literal(&search) {
21476 // Literal search -> simple equality
21477 let eq = Expression::Eq(Box::new(BinaryOp {
21478 left: this_expr.clone(),
21479 right: search,
21480 left_comments: Vec::new(),
21481 operator_comments: Vec::new(),
21482 trailing_comments: Vec::new(),
21483 inferred_type: None,
21484 }));
21485 (eq, result)
21486 } else {
21487 // Non-literal (column ref, expression) -> null-safe comparison
21488 let needs_paren = matches!(
21489 &search,
21490 Expression::Eq(_)
21491 | Expression::Neq(_)
21492 | Expression::Gt(_)
21493 | Expression::Gte(_)
21494 | Expression::Lt(_)
21495 | Expression::Lte(_)
21496 );
21497 let search_for_eq = if needs_paren {
21498 Expression::Paren(Box::new(
21499 crate::expressions::Paren {
21500 this: search.clone(),
21501 trailing_comments: Vec::new(),
21502 },
21503 ))
21504 } else {
21505 search.clone()
21506 };
21507 let eq = Expression::Eq(Box::new(BinaryOp {
21508 left: this_expr.clone(),
21509 right: search_for_eq,
21510 left_comments: Vec::new(),
21511 operator_comments: Vec::new(),
21512 trailing_comments: Vec::new(),
21513 inferred_type: None,
21514 }));
21515 let search_for_null = if needs_paren {
21516 Expression::Paren(Box::new(
21517 crate::expressions::Paren {
21518 this: search.clone(),
21519 trailing_comments: Vec::new(),
21520 },
21521 ))
21522 } else {
21523 search.clone()
21524 };
21525 let x_is_null = Expression::Is(Box::new(BinaryOp {
21526 left: this_expr.clone(),
21527 right: Expression::Null(crate::expressions::Null),
21528 left_comments: Vec::new(),
21529 operator_comments: Vec::new(),
21530 trailing_comments: Vec::new(),
21531 inferred_type: None,
21532 }));
21533 let s_is_null = Expression::Is(Box::new(BinaryOp {
21534 left: search_for_null,
21535 right: Expression::Null(crate::expressions::Null),
21536 left_comments: Vec::new(),
21537 operator_comments: Vec::new(),
21538 trailing_comments: Vec::new(),
21539 inferred_type: None,
21540 }));
21541 let both_null = Expression::And(Box::new(BinaryOp {
21542 left: x_is_null,
21543 right: s_is_null,
21544 left_comments: Vec::new(),
21545 operator_comments: Vec::new(),
21546 trailing_comments: Vec::new(),
21547 inferred_type: None,
21548 }));
21549 let condition = Expression::Or(Box::new(BinaryOp {
21550 left: eq,
21551 right: Expression::Paren(Box::new(
21552 crate::expressions::Paren {
21553 this: both_null,
21554 trailing_comments: Vec::new(),
21555 },
21556 )),
21557 left_comments: Vec::new(),
21558 operator_comments: Vec::new(),
21559 trailing_comments: Vec::new(),
21560 inferred_type: None,
21561 }));
21562 (condition, result)
21563 }
21564 })
21565 .collect();
21566 Ok(Expression::Case(Box::new(Case {
21567 operand: None,
21568 whens,
21569 else_: default,
21570 comments: Vec::new(),
21571 inferred_type: None,
21572 })))
21573 }
21574 // LEVENSHTEIN(a, b, ...) -> dialect-specific
21575 "LEVENSHTEIN" => {
21576 match target {
21577 DialectType::BigQuery => {
21578 let mut new_f = *f;
21579 new_f.name = "EDIT_DISTANCE".to_string();
21580 Ok(Expression::Function(Box::new(new_f)))
21581 }
21582 DialectType::Drill => {
21583 let mut new_f = *f;
21584 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
21585 Ok(Expression::Function(Box::new(new_f)))
21586 }
21587 DialectType::PostgreSQL if f.args.len() == 6 => {
21588 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
21589 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
21590 let mut new_f = *f;
21591 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
21592 Ok(Expression::Function(Box::new(new_f)))
21593 }
21594 _ => Ok(Expression::Function(f)),
21595 }
21596 }
21597 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
21598 "ARRAY_MAX" => {
21599 let name = match target {
21600 DialectType::ClickHouse => "arrayMax",
21601 DialectType::DuckDB => "LIST_MAX",
21602 _ => "ARRAY_MAX",
21603 };
21604 let mut new_f = *f;
21605 new_f.name = name.to_string();
21606 Ok(Expression::Function(Box::new(new_f)))
21607 }
21608 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
21609 "ARRAY_MIN" => {
21610 let name = match target {
21611 DialectType::ClickHouse => "arrayMin",
21612 DialectType::DuckDB => "LIST_MIN",
21613 _ => "ARRAY_MIN",
21614 };
21615 let mut new_f = *f;
21616 new_f.name = name.to_string();
21617 Ok(Expression::Function(Box::new(new_f)))
21618 }
21619 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
21620 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
21621 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
21622 let mut args = f.args;
21623 let b = args.pop().unwrap();
21624 let a = args.pop().unwrap();
21625 match target {
21626 DialectType::ClickHouse => {
21627 let upper_a = Expression::Upper(Box::new(
21628 crate::expressions::UnaryFunc::new(a),
21629 ));
21630 let upper_b = Expression::Upper(Box::new(
21631 crate::expressions::UnaryFunc::new(b),
21632 ));
21633 Ok(Expression::Function(Box::new(Function::new(
21634 "jaroWinklerSimilarity".to_string(),
21635 vec![upper_a, upper_b],
21636 ))))
21637 }
21638 DialectType::DuckDB => {
21639 let upper_a = Expression::Upper(Box::new(
21640 crate::expressions::UnaryFunc::new(a),
21641 ));
21642 let upper_b = Expression::Upper(Box::new(
21643 crate::expressions::UnaryFunc::new(b),
21644 ));
21645 let score = Expression::Function(Box::new(Function::new(
21646 "JARO_WINKLER_SIMILARITY".to_string(),
21647 vec![upper_a, upper_b],
21648 )));
21649 let scaled = Expression::Mul(Box::new(BinaryOp {
21650 left: score,
21651 right: Expression::number(100),
21652 left_comments: Vec::new(),
21653 operator_comments: Vec::new(),
21654 trailing_comments: Vec::new(),
21655 inferred_type: None,
21656 }));
21657 Ok(Expression::Cast(Box::new(Cast {
21658 this: scaled,
21659 to: DataType::Int {
21660 length: None,
21661 integer_spelling: false,
21662 },
21663 trailing_comments: Vec::new(),
21664 double_colon_syntax: false,
21665 format: None,
21666 default: None,
21667 inferred_type: None,
21668 })))
21669 }
21670 _ => Ok(Expression::Function(Box::new(Function::new(
21671 "JAROWINKLER_SIMILARITY".to_string(),
21672 vec![a, b],
21673 )))),
21674 }
21675 }
21676 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
21677 "CURRENT_SCHEMAS" => match target {
21678 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21679 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
21680 ))),
21681 _ => Ok(Expression::Function(f)),
21682 },
21683 // TRUNC/TRUNCATE (numeric) -> dialect-specific
21684 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
21685 match target {
21686 DialectType::TSQL | DialectType::Fabric => {
21687 // ROUND(x, decimals, 1) - the 1 flag means truncation
21688 let mut args = f.args;
21689 let this = if args.is_empty() {
21690 return Ok(Expression::Function(Box::new(
21691 Function::new("TRUNC".to_string(), args),
21692 )));
21693 } else {
21694 args.remove(0)
21695 };
21696 let decimals = if args.is_empty() {
21697 Expression::Literal(Box::new(Literal::Number(
21698 "0".to_string(),
21699 )))
21700 } else {
21701 args.remove(0)
21702 };
21703 Ok(Expression::Function(Box::new(Function::new(
21704 "ROUND".to_string(),
21705 vec![
21706 this,
21707 decimals,
21708 Expression::Literal(Box::new(Literal::Number(
21709 "1".to_string(),
21710 ))),
21711 ],
21712 ))))
21713 }
21714 DialectType::Presto
21715 | DialectType::Trino
21716 | DialectType::Athena => {
21717 // TRUNCATE(x, decimals)
21718 let mut new_f = *f;
21719 new_f.name = "TRUNCATE".to_string();
21720 Ok(Expression::Function(Box::new(new_f)))
21721 }
21722 DialectType::MySQL
21723 | DialectType::SingleStore
21724 | DialectType::TiDB => {
21725 // TRUNCATE(x, decimals)
21726 let mut new_f = *f;
21727 new_f.name = "TRUNCATE".to_string();
21728 Ok(Expression::Function(Box::new(new_f)))
21729 }
21730 DialectType::DuckDB => {
21731 // DuckDB supports TRUNC(x, decimals) — preserve both args
21732 let mut args = f.args;
21733 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
21734 if args.len() == 2
21735 && matches!(source, DialectType::Snowflake)
21736 {
21737 let decimals = args.remove(1);
21738 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
21739 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
21740 let wrapped = if !is_int {
21741 Expression::Cast(Box::new(
21742 crate::expressions::Cast {
21743 this: decimals,
21744 to: DataType::Int {
21745 length: None,
21746 integer_spelling: false,
21747 },
21748 double_colon_syntax: false,
21749 trailing_comments: Vec::new(),
21750 format: None,
21751 default: None,
21752 inferred_type: None,
21753 },
21754 ))
21755 } else {
21756 decimals
21757 };
21758 args.push(wrapped);
21759 }
21760 Ok(Expression::Function(Box::new(Function::new(
21761 "TRUNC".to_string(),
21762 args,
21763 ))))
21764 }
21765 DialectType::ClickHouse => {
21766 // trunc(x, decimals) - lowercase
21767 let mut new_f = *f;
21768 new_f.name = "trunc".to_string();
21769 Ok(Expression::Function(Box::new(new_f)))
21770 }
21771 DialectType::Spark | DialectType::Databricks => {
21772 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
21773 let this = f.args.into_iter().next().unwrap_or(
21774 Expression::Literal(Box::new(Literal::Number(
21775 "0".to_string(),
21776 ))),
21777 );
21778 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
21779 this,
21780 to: crate::expressions::DataType::BigInt {
21781 length: None,
21782 },
21783 double_colon_syntax: false,
21784 trailing_comments: Vec::new(),
21785 format: None,
21786 default: None,
21787 inferred_type: None,
21788 })))
21789 }
21790 _ => {
21791 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
21792 let mut new_f = *f;
21793 new_f.name = "TRUNC".to_string();
21794 Ok(Expression::Function(Box::new(new_f)))
21795 }
21796 }
21797 }
21798 // CURRENT_VERSION() -> VERSION() for most dialects
21799 "CURRENT_VERSION" => match target {
21800 DialectType::Snowflake
21801 | DialectType::Databricks
21802 | DialectType::StarRocks => Ok(Expression::Function(f)),
21803 DialectType::SQLite => {
21804 let mut new_f = *f;
21805 new_f.name = "SQLITE_VERSION".to_string();
21806 Ok(Expression::Function(Box::new(new_f)))
21807 }
21808 _ => {
21809 let mut new_f = *f;
21810 new_f.name = "VERSION".to_string();
21811 Ok(Expression::Function(Box::new(new_f)))
21812 }
21813 },
21814 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
21815 "ARRAY_REVERSE" => match target {
21816 DialectType::ClickHouse => {
21817 let mut new_f = *f;
21818 new_f.name = "arrayReverse".to_string();
21819 Ok(Expression::Function(Box::new(new_f)))
21820 }
21821 _ => Ok(Expression::Function(f)),
21822 },
21823 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
21824 "GENERATE_DATE_ARRAY" => {
21825 let mut args = f.args;
21826 if matches!(target, DialectType::BigQuery) {
21827 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
21828 if args.len() == 2 {
21829 let default_interval = Expression::Interval(Box::new(
21830 crate::expressions::Interval {
21831 this: Some(Expression::Literal(Box::new(
21832 Literal::String("1".to_string()),
21833 ))),
21834 unit: Some(
21835 crate::expressions::IntervalUnitSpec::Simple {
21836 unit: crate::expressions::IntervalUnit::Day,
21837 use_plural: false,
21838 },
21839 ),
21840 },
21841 ));
21842 args.push(default_interval);
21843 }
21844 Ok(Expression::Function(Box::new(Function::new(
21845 "GENERATE_DATE_ARRAY".to_string(),
21846 args,
21847 ))))
21848 } else if matches!(target, DialectType::DuckDB) {
21849 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
21850 let start = args.get(0).cloned();
21851 let end = args.get(1).cloned();
21852 let step = args.get(2).cloned().or_else(|| {
21853 Some(Expression::Interval(Box::new(
21854 crate::expressions::Interval {
21855 this: Some(Expression::Literal(Box::new(
21856 Literal::String("1".to_string()),
21857 ))),
21858 unit: Some(
21859 crate::expressions::IntervalUnitSpec::Simple {
21860 unit: crate::expressions::IntervalUnit::Day,
21861 use_plural: false,
21862 },
21863 ),
21864 },
21865 )))
21866 });
21867 let gen_series = Expression::GenerateSeries(Box::new(
21868 crate::expressions::GenerateSeries {
21869 start: start.map(Box::new),
21870 end: end.map(Box::new),
21871 step: step.map(Box::new),
21872 is_end_exclusive: None,
21873 },
21874 ));
21875 Ok(Expression::Cast(Box::new(Cast {
21876 this: gen_series,
21877 to: DataType::Array {
21878 element_type: Box::new(DataType::Date),
21879 dimension: None,
21880 },
21881 trailing_comments: vec![],
21882 double_colon_syntax: false,
21883 format: None,
21884 default: None,
21885 inferred_type: None,
21886 })))
21887 } else if matches!(
21888 target,
21889 DialectType::Presto | DialectType::Trino | DialectType::Athena
21890 ) {
21891 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
21892 let start = args.get(0).cloned();
21893 let end = args.get(1).cloned();
21894 let step = args.get(2).cloned().or_else(|| {
21895 Some(Expression::Interval(Box::new(
21896 crate::expressions::Interval {
21897 this: Some(Expression::Literal(Box::new(
21898 Literal::String("1".to_string()),
21899 ))),
21900 unit: Some(
21901 crate::expressions::IntervalUnitSpec::Simple {
21902 unit: crate::expressions::IntervalUnit::Day,
21903 use_plural: false,
21904 },
21905 ),
21906 },
21907 )))
21908 });
21909 let gen_series = Expression::GenerateSeries(Box::new(
21910 crate::expressions::GenerateSeries {
21911 start: start.map(Box::new),
21912 end: end.map(Box::new),
21913 step: step.map(Box::new),
21914 is_end_exclusive: None,
21915 },
21916 ));
21917 Ok(gen_series)
21918 } else if matches!(
21919 target,
21920 DialectType::Spark | DialectType::Databricks
21921 ) {
21922 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
21923 let start = args.get(0).cloned();
21924 let end = args.get(1).cloned();
21925 let step = args.get(2).cloned().or_else(|| {
21926 Some(Expression::Interval(Box::new(
21927 crate::expressions::Interval {
21928 this: Some(Expression::Literal(Box::new(
21929 Literal::String("1".to_string()),
21930 ))),
21931 unit: Some(
21932 crate::expressions::IntervalUnitSpec::Simple {
21933 unit: crate::expressions::IntervalUnit::Day,
21934 use_plural: false,
21935 },
21936 ),
21937 },
21938 )))
21939 });
21940 let gen_series = Expression::GenerateSeries(Box::new(
21941 crate::expressions::GenerateSeries {
21942 start: start.map(Box::new),
21943 end: end.map(Box::new),
21944 step: step.map(Box::new),
21945 is_end_exclusive: None,
21946 },
21947 ));
21948 Ok(gen_series)
21949 } else if matches!(target, DialectType::Snowflake) {
21950 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
21951 if args.len() == 2 {
21952 let default_interval = Expression::Interval(Box::new(
21953 crate::expressions::Interval {
21954 this: Some(Expression::Literal(Box::new(
21955 Literal::String("1".to_string()),
21956 ))),
21957 unit: Some(
21958 crate::expressions::IntervalUnitSpec::Simple {
21959 unit: crate::expressions::IntervalUnit::Day,
21960 use_plural: false,
21961 },
21962 ),
21963 },
21964 ));
21965 args.push(default_interval);
21966 }
21967 Ok(Expression::Function(Box::new(Function::new(
21968 "GENERATE_DATE_ARRAY".to_string(),
21969 args,
21970 ))))
21971 } else if matches!(
21972 target,
21973 DialectType::MySQL
21974 | DialectType::TSQL
21975 | DialectType::Fabric
21976 | DialectType::Redshift
21977 ) {
21978 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
21979 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
21980 Ok(Expression::Function(Box::new(Function::new(
21981 "GENERATE_DATE_ARRAY".to_string(),
21982 args,
21983 ))))
21984 } else {
21985 // PostgreSQL/others: convert to GenerateSeries
21986 let start = args.get(0).cloned();
21987 let end = args.get(1).cloned();
21988 let step = args.get(2).cloned().or_else(|| {
21989 Some(Expression::Interval(Box::new(
21990 crate::expressions::Interval {
21991 this: Some(Expression::Literal(Box::new(
21992 Literal::String("1".to_string()),
21993 ))),
21994 unit: Some(
21995 crate::expressions::IntervalUnitSpec::Simple {
21996 unit: crate::expressions::IntervalUnit::Day,
21997 use_plural: false,
21998 },
21999 ),
22000 },
22001 )))
22002 });
22003 Ok(Expression::GenerateSeries(Box::new(
22004 crate::expressions::GenerateSeries {
22005 start: start.map(Box::new),
22006 end: end.map(Box::new),
22007 step: step.map(Box::new),
22008 is_end_exclusive: None,
22009 },
22010 )))
22011 }
22012 }
22013 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
22014 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
22015 "ARRAYS_OVERLAP"
22016 if f.args.len() == 2
22017 && matches!(source, DialectType::Snowflake)
22018 && matches!(target, DialectType::DuckDB) =>
22019 {
22020 let mut args = f.args;
22021 let arr1 = args.remove(0);
22022 let arr2 = args.remove(0);
22023
22024 // (arr1 && arr2)
22025 let overlap = Expression::Paren(Box::new(Paren {
22026 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
22027 left: arr1.clone(),
22028 right: arr2.clone(),
22029 left_comments: vec![],
22030 operator_comments: vec![],
22031 trailing_comments: vec![],
22032 inferred_type: None,
22033 })),
22034 trailing_comments: vec![],
22035 }));
22036
22037 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
22038 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
22039 left: Expression::Function(Box::new(Function::new(
22040 "ARRAY_LENGTH".to_string(),
22041 vec![arr1.clone()],
22042 ))),
22043 right: Expression::Function(Box::new(Function::new(
22044 "LIST_COUNT".to_string(),
22045 vec![arr1],
22046 ))),
22047 left_comments: vec![],
22048 operator_comments: vec![],
22049 trailing_comments: vec![],
22050 inferred_type: None,
22051 }));
22052
22053 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
22054 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
22055 left: Expression::Function(Box::new(Function::new(
22056 "ARRAY_LENGTH".to_string(),
22057 vec![arr2.clone()],
22058 ))),
22059 right: Expression::Function(Box::new(Function::new(
22060 "LIST_COUNT".to_string(),
22061 vec![arr2],
22062 ))),
22063 left_comments: vec![],
22064 operator_comments: vec![],
22065 trailing_comments: vec![],
22066 inferred_type: None,
22067 }));
22068
22069 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
22070 let null_check = Expression::Paren(Box::new(Paren {
22071 this: Expression::And(Box::new(BinaryOp {
22072 left: arr1_has_null,
22073 right: arr2_has_null,
22074 left_comments: vec![],
22075 operator_comments: vec![],
22076 trailing_comments: vec![],
22077 inferred_type: None,
22078 })),
22079 trailing_comments: vec![],
22080 }));
22081
22082 // (arr1 && arr2) OR (null_check)
22083 Ok(Expression::Or(Box::new(BinaryOp {
22084 left: overlap,
22085 right: null_check,
22086 left_comments: vec![],
22087 operator_comments: vec![],
22088 trailing_comments: vec![],
22089 inferred_type: None,
22090 })))
22091 }
22092 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
22093 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
22094 "ARRAY_INTERSECTION"
22095 if f.args.len() == 2
22096 && matches!(source, DialectType::Snowflake)
22097 && matches!(target, DialectType::DuckDB) =>
22098 {
22099 let mut args = f.args;
22100 let arr1 = args.remove(0);
22101 let arr2 = args.remove(0);
22102
22103 // Build: arr1 IS NULL
22104 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
22105 this: arr1.clone(),
22106 not: false,
22107 postfix_form: false,
22108 }));
22109 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
22110 this: arr2.clone(),
22111 not: false,
22112 postfix_form: false,
22113 }));
22114 let null_check = Expression::Or(Box::new(BinaryOp {
22115 left: arr1_is_null,
22116 right: arr2_is_null,
22117 left_comments: vec![],
22118 operator_comments: vec![],
22119 trailing_comments: vec![],
22120 inferred_type: None,
22121 }));
22122
22123 // GENERATE_SERIES(1, LENGTH(arr1))
22124 let gen_series = Expression::Function(Box::new(Function::new(
22125 "GENERATE_SERIES".to_string(),
22126 vec![
22127 Expression::number(1),
22128 Expression::Function(Box::new(Function::new(
22129 "LENGTH".to_string(),
22130 vec![arr1.clone()],
22131 ))),
22132 ],
22133 )));
22134
22135 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
22136 let list_zip = Expression::Function(Box::new(Function::new(
22137 "LIST_ZIP".to_string(),
22138 vec![arr1.clone(), gen_series],
22139 )));
22140
22141 // pair[1] and pair[2]
22142 let pair_col = Expression::column("pair");
22143 let pair_1 = Expression::Subscript(Box::new(
22144 crate::expressions::Subscript {
22145 this: pair_col.clone(),
22146 index: Expression::number(1),
22147 },
22148 ));
22149 let pair_2 = Expression::Subscript(Box::new(
22150 crate::expressions::Subscript {
22151 this: pair_col.clone(),
22152 index: Expression::number(2),
22153 },
22154 ));
22155
22156 // arr1[1:pair[2]]
22157 let arr1_slice = Expression::ArraySlice(Box::new(
22158 crate::expressions::ArraySlice {
22159 this: arr1.clone(),
22160 start: Some(Expression::number(1)),
22161 end: Some(pair_2),
22162 },
22163 ));
22164
22165 // e IS NOT DISTINCT FROM pair[1]
22166 let e_col = Expression::column("e");
22167 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
22168 left: e_col.clone(),
22169 right: pair_1.clone(),
22170 left_comments: vec![],
22171 operator_comments: vec![],
22172 trailing_comments: vec![],
22173 inferred_type: None,
22174 }));
22175
22176 // e -> e IS NOT DISTINCT FROM pair[1]
22177 let inner_lambda1 =
22178 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22179 parameters: vec![crate::expressions::Identifier::new("e")],
22180 body: is_not_distinct,
22181 colon: false,
22182 parameter_types: vec![],
22183 }));
22184
22185 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
22186 let inner_filter1 = Expression::Function(Box::new(Function::new(
22187 "LIST_FILTER".to_string(),
22188 vec![arr1_slice, inner_lambda1],
22189 )));
22190
22191 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
22192 let len1 = Expression::Function(Box::new(Function::new(
22193 "LENGTH".to_string(),
22194 vec![inner_filter1],
22195 )));
22196
22197 // e -> e IS NOT DISTINCT FROM pair[1]
22198 let inner_lambda2 =
22199 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22200 parameters: vec![crate::expressions::Identifier::new("e")],
22201 body: Expression::NullSafeEq(Box::new(BinaryOp {
22202 left: e_col,
22203 right: pair_1.clone(),
22204 left_comments: vec![],
22205 operator_comments: vec![],
22206 trailing_comments: vec![],
22207 inferred_type: None,
22208 })),
22209 colon: false,
22210 parameter_types: vec![],
22211 }));
22212
22213 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
22214 let inner_filter2 = Expression::Function(Box::new(Function::new(
22215 "LIST_FILTER".to_string(),
22216 vec![arr2.clone(), inner_lambda2],
22217 )));
22218
22219 // LENGTH(LIST_FILTER(arr2, ...))
22220 let len2 = Expression::Function(Box::new(Function::new(
22221 "LENGTH".to_string(),
22222 vec![inner_filter2],
22223 )));
22224
22225 // LENGTH(...) <= LENGTH(...)
22226 let cond = Expression::Paren(Box::new(Paren {
22227 this: Expression::Lte(Box::new(BinaryOp {
22228 left: len1,
22229 right: len2,
22230 left_comments: vec![],
22231 operator_comments: vec![],
22232 trailing_comments: vec![],
22233 inferred_type: None,
22234 })),
22235 trailing_comments: vec![],
22236 }));
22237
22238 // pair -> (condition)
22239 let filter_lambda =
22240 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22241 parameters: vec![crate::expressions::Identifier::new(
22242 "pair",
22243 )],
22244 body: cond,
22245 colon: false,
22246 parameter_types: vec![],
22247 }));
22248
22249 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
22250 let outer_filter = Expression::Function(Box::new(Function::new(
22251 "LIST_FILTER".to_string(),
22252 vec![list_zip, filter_lambda],
22253 )));
22254
22255 // pair -> pair[1]
22256 let transform_lambda =
22257 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22258 parameters: vec![crate::expressions::Identifier::new(
22259 "pair",
22260 )],
22261 body: pair_1,
22262 colon: false,
22263 parameter_types: vec![],
22264 }));
22265
22266 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
22267 let list_transform = Expression::Function(Box::new(Function::new(
22268 "LIST_TRANSFORM".to_string(),
22269 vec![outer_filter, transform_lambda],
22270 )));
22271
22272 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
22273 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
22274 // END
22275 Ok(Expression::Case(Box::new(Case {
22276 operand: None,
22277 whens: vec![(null_check, Expression::Null(Null))],
22278 else_: Some(list_transform),
22279 comments: vec![],
22280 inferred_type: None,
22281 })))
22282 }
22283 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
22284 "ARRAY_CONSTRUCT" => {
22285 if matches!(target, DialectType::Snowflake) {
22286 Ok(Expression::Function(f))
22287 } else {
22288 Ok(Expression::Array(Box::new(crate::expressions::Array {
22289 expressions: f.args,
22290 })))
22291 }
22292 }
22293 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
22294 "ARRAY"
22295 if !f.args.iter().any(|a| {
22296 matches!(a, Expression::Select(_) | Expression::Subquery(_))
22297 }) =>
22298 {
22299 match target {
22300 DialectType::DuckDB
22301 | DialectType::Snowflake
22302 | DialectType::Presto
22303 | DialectType::Trino
22304 | DialectType::Athena => {
22305 Ok(Expression::Array(Box::new(crate::expressions::Array {
22306 expressions: f.args,
22307 })))
22308 }
22309 _ => Ok(Expression::Function(f)),
22310 }
22311 }
22312 _ => Ok(Expression::Function(f)),
22313 }
22314 } else if let Expression::AggregateFunction(mut af) = e {
22315 let name = af.name.to_ascii_uppercase();
22316 match name.as_str() {
22317 "ARBITRARY" if af.args.len() == 1 => {
22318 let arg = af.args.into_iter().next().unwrap();
22319 Ok(convert_arbitrary(arg, target))
22320 }
22321 "JSON_ARRAYAGG" => {
22322 match target {
22323 DialectType::PostgreSQL => {
22324 af.name = "JSON_AGG".to_string();
22325 // Add NULLS FIRST to ORDER BY items for PostgreSQL
22326 for ordered in af.order_by.iter_mut() {
22327 if ordered.nulls_first.is_none() {
22328 ordered.nulls_first = Some(true);
22329 }
22330 }
22331 Ok(Expression::AggregateFunction(af))
22332 }
22333 _ => Ok(Expression::AggregateFunction(af)),
22334 }
22335 }
22336 _ => Ok(Expression::AggregateFunction(af)),
22337 }
22338 } else if let Expression::JSONArrayAgg(ja) = e {
22339 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
22340 match target {
22341 DialectType::PostgreSQL => {
22342 let mut order_by = Vec::new();
22343 if let Some(order_expr) = ja.order {
22344 if let Expression::OrderBy(ob) = *order_expr {
22345 for mut ordered in ob.expressions {
22346 if ordered.nulls_first.is_none() {
22347 ordered.nulls_first = Some(true);
22348 }
22349 order_by.push(ordered);
22350 }
22351 }
22352 }
22353 Ok(Expression::AggregateFunction(Box::new(
22354 crate::expressions::AggregateFunction {
22355 name: "JSON_AGG".to_string(),
22356 args: vec![*ja.this],
22357 distinct: false,
22358 filter: None,
22359 order_by,
22360 limit: None,
22361 ignore_nulls: None,
22362 inferred_type: None,
22363 },
22364 )))
22365 }
22366 _ => Ok(Expression::JSONArrayAgg(ja)),
22367 }
22368 } else if let Expression::JSONArray(ja) = e {
22369 match target {
22370 DialectType::Snowflake
22371 if ja.null_handling.is_none()
22372 && ja.return_type.is_none()
22373 && ja.strict.is_none() =>
22374 {
22375 let array_construct = Expression::ArrayFunc(Box::new(
22376 crate::expressions::ArrayConstructor {
22377 expressions: ja.expressions,
22378 bracket_notation: false,
22379 use_list_keyword: false,
22380 },
22381 ));
22382 Ok(Expression::Function(Box::new(Function::new(
22383 "TO_VARIANT".to_string(),
22384 vec![array_construct],
22385 ))))
22386 }
22387 _ => Ok(Expression::JSONArray(ja)),
22388 }
22389 } else if let Expression::JsonArray(f) = e {
22390 match target {
22391 DialectType::Snowflake => {
22392 let array_construct = Expression::ArrayFunc(Box::new(
22393 crate::expressions::ArrayConstructor {
22394 expressions: f.expressions,
22395 bracket_notation: false,
22396 use_list_keyword: false,
22397 },
22398 ));
22399 Ok(Expression::Function(Box::new(Function::new(
22400 "TO_VARIANT".to_string(),
22401 vec![array_construct],
22402 ))))
22403 }
22404 _ => Ok(Expression::JsonArray(f)),
22405 }
22406 } else if let Expression::CombinedParameterizedAgg(cpa) = e {
22407 let function_name = match cpa.this.as_ref() {
22408 Expression::Identifier(ident) => Some(ident.name.as_str()),
22409 _ => None,
22410 };
22411 match function_name {
22412 Some(name)
22413 if name.eq_ignore_ascii_case("groupConcat")
22414 && cpa.expressions.len() == 1 =>
22415 {
22416 match target {
22417 DialectType::MySQL | DialectType::SingleStore => {
22418 let this = cpa.expressions[0].clone();
22419 let separator = cpa.params.first().cloned();
22420 Ok(Expression::GroupConcat(Box::new(
22421 crate::expressions::GroupConcatFunc {
22422 this,
22423 separator,
22424 order_by: None,
22425 distinct: false,
22426 filter: None,
22427 limit: None,
22428 inferred_type: None,
22429 },
22430 )))
22431 }
22432 DialectType::DuckDB => Ok(Expression::ListAgg(Box::new({
22433 let this = cpa.expressions[0].clone();
22434 let separator = cpa.params.first().cloned();
22435 crate::expressions::ListAggFunc {
22436 this,
22437 separator,
22438 on_overflow: None,
22439 order_by: None,
22440 distinct: false,
22441 filter: None,
22442 inferred_type: None,
22443 }
22444 }))),
22445 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22446 }
22447 }
22448 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22449 }
22450 } else if let Expression::ToNumber(tn) = e {
22451 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
22452 let arg = *tn.this;
22453 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
22454 this: arg,
22455 to: crate::expressions::DataType::Double {
22456 precision: None,
22457 scale: None,
22458 },
22459 double_colon_syntax: false,
22460 trailing_comments: Vec::new(),
22461 format: None,
22462 default: None,
22463 inferred_type: None,
22464 })))
22465 } else {
22466 Ok(e)
22467 }
22468 }
22469
22470 Action::RegexpLikeToDuckDB => {
22471 if let Expression::RegexpLike(f) = e {
22472 let mut args = vec![f.this, f.pattern];
22473 if let Some(flags) = f.flags {
22474 args.push(flags);
22475 }
22476 Ok(Expression::Function(Box::new(Function::new(
22477 "REGEXP_MATCHES".to_string(),
22478 args,
22479 ))))
22480 } else {
22481 Ok(e)
22482 }
22483 }
22484 Action::EpochConvert => {
22485 if let Expression::Epoch(f) = e {
22486 let arg = f.this;
22487 let name = match target {
22488 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
22489 "UNIX_TIMESTAMP"
22490 }
22491 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
22492 DialectType::BigQuery => "TIME_TO_UNIX",
22493 _ => "EPOCH",
22494 };
22495 Ok(Expression::Function(Box::new(Function::new(
22496 name.to_string(),
22497 vec![arg],
22498 ))))
22499 } else {
22500 Ok(e)
22501 }
22502 }
22503 Action::EpochMsConvert => {
22504 use crate::expressions::{BinaryOp, Cast};
22505 if let Expression::EpochMs(f) = e {
22506 let arg = f.this;
22507 match target {
22508 DialectType::Spark | DialectType::Databricks => {
22509 Ok(Expression::Function(Box::new(Function::new(
22510 "TIMESTAMP_MILLIS".to_string(),
22511 vec![arg],
22512 ))))
22513 }
22514 DialectType::BigQuery => Ok(Expression::Function(Box::new(
22515 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
22516 ))),
22517 DialectType::Presto | DialectType::Trino => {
22518 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
22519 let cast_arg = Expression::Cast(Box::new(Cast {
22520 this: arg,
22521 to: DataType::Double {
22522 precision: None,
22523 scale: None,
22524 },
22525 trailing_comments: Vec::new(),
22526 double_colon_syntax: false,
22527 format: None,
22528 default: None,
22529 inferred_type: None,
22530 }));
22531 let div = Expression::Div(Box::new(BinaryOp::new(
22532 cast_arg,
22533 Expression::Function(Box::new(Function::new(
22534 "POW".to_string(),
22535 vec![Expression::number(10), Expression::number(3)],
22536 ))),
22537 )));
22538 Ok(Expression::Function(Box::new(Function::new(
22539 "FROM_UNIXTIME".to_string(),
22540 vec![div],
22541 ))))
22542 }
22543 DialectType::MySQL => {
22544 // FROM_UNIXTIME(x / POWER(10, 3))
22545 let div = Expression::Div(Box::new(BinaryOp::new(
22546 arg,
22547 Expression::Function(Box::new(Function::new(
22548 "POWER".to_string(),
22549 vec![Expression::number(10), Expression::number(3)],
22550 ))),
22551 )));
22552 Ok(Expression::Function(Box::new(Function::new(
22553 "FROM_UNIXTIME".to_string(),
22554 vec![div],
22555 ))))
22556 }
22557 DialectType::PostgreSQL | DialectType::Redshift => {
22558 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
22559 let cast_arg = Expression::Cast(Box::new(Cast {
22560 this: arg,
22561 to: DataType::Custom {
22562 name: "DOUBLE PRECISION".to_string(),
22563 },
22564 trailing_comments: Vec::new(),
22565 double_colon_syntax: false,
22566 format: None,
22567 default: None,
22568 inferred_type: None,
22569 }));
22570 let div = Expression::Div(Box::new(BinaryOp::new(
22571 cast_arg,
22572 Expression::Function(Box::new(Function::new(
22573 "POWER".to_string(),
22574 vec![Expression::number(10), Expression::number(3)],
22575 ))),
22576 )));
22577 Ok(Expression::Function(Box::new(Function::new(
22578 "TO_TIMESTAMP".to_string(),
22579 vec![div],
22580 ))))
22581 }
22582 DialectType::ClickHouse => {
22583 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
22584 let cast_arg = Expression::Cast(Box::new(Cast {
22585 this: arg,
22586 to: DataType::Nullable {
22587 inner: Box::new(DataType::BigInt { length: None }),
22588 },
22589 trailing_comments: Vec::new(),
22590 double_colon_syntax: false,
22591 format: None,
22592 default: None,
22593 inferred_type: None,
22594 }));
22595 Ok(Expression::Function(Box::new(Function::new(
22596 "fromUnixTimestamp64Milli".to_string(),
22597 vec![cast_arg],
22598 ))))
22599 }
22600 _ => Ok(Expression::Function(Box::new(Function::new(
22601 "EPOCH_MS".to_string(),
22602 vec![arg],
22603 )))),
22604 }
22605 } else {
22606 Ok(e)
22607 }
22608 }
22609 Action::TSQLTypeNormalize => {
22610 if let Expression::DataType(dt) = e {
22611 let new_dt = match &dt {
22612 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
22613 DataType::Decimal {
22614 precision: Some(15),
22615 scale: Some(4),
22616 }
22617 }
22618 DataType::Custom { name }
22619 if name.eq_ignore_ascii_case("SMALLMONEY") =>
22620 {
22621 DataType::Decimal {
22622 precision: Some(6),
22623 scale: Some(4),
22624 }
22625 }
22626 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
22627 DataType::Timestamp {
22628 timezone: false,
22629 precision: None,
22630 }
22631 }
22632 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
22633 DataType::Float {
22634 precision: None,
22635 scale: None,
22636 real_spelling: false,
22637 }
22638 }
22639 DataType::Float {
22640 real_spelling: true,
22641 ..
22642 } => DataType::Float {
22643 precision: None,
22644 scale: None,
22645 real_spelling: false,
22646 },
22647 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
22648 DataType::Custom {
22649 name: "BLOB".to_string(),
22650 }
22651 }
22652 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
22653 DataType::Boolean
22654 }
22655 DataType::Custom { name }
22656 if name.eq_ignore_ascii_case("ROWVERSION") =>
22657 {
22658 DataType::Custom {
22659 name: "BINARY".to_string(),
22660 }
22661 }
22662 DataType::Custom { name }
22663 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
22664 {
22665 match target {
22666 DialectType::Spark
22667 | DialectType::Databricks
22668 | DialectType::Hive => DataType::Custom {
22669 name: "STRING".to_string(),
22670 },
22671 _ => DataType::VarChar {
22672 length: Some(36),
22673 parenthesized_length: true,
22674 },
22675 }
22676 }
22677 DataType::Custom { name }
22678 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
22679 {
22680 match target {
22681 DialectType::Spark
22682 | DialectType::Databricks
22683 | DialectType::Hive => DataType::Timestamp {
22684 timezone: false,
22685 precision: None,
22686 },
22687 _ => DataType::Timestamp {
22688 timezone: true,
22689 precision: None,
22690 },
22691 }
22692 }
22693 DataType::Custom { ref name }
22694 if name.len() >= 10
22695 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
22696 {
22697 // DATETIME2(n) -> TIMESTAMP
22698 DataType::Timestamp {
22699 timezone: false,
22700 precision: None,
22701 }
22702 }
22703 DataType::Custom { ref name }
22704 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
22705 {
22706 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
22707 match target {
22708 DialectType::Spark
22709 | DialectType::Databricks
22710 | DialectType::Hive => DataType::Timestamp {
22711 timezone: false,
22712 precision: None,
22713 },
22714 _ => return Ok(Expression::DataType(dt)),
22715 }
22716 }
22717 DataType::Custom { ref name }
22718 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
22719 {
22720 // Parse NUMERIC(p,s) back to Decimal(p,s)
22721 let upper = name.to_ascii_uppercase();
22722 if let Some(inner) = upper
22723 .strip_prefix("NUMERIC(")
22724 .and_then(|s| s.strip_suffix(')'))
22725 {
22726 let parts: Vec<&str> = inner.split(',').collect();
22727 let precision =
22728 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
22729 let scale =
22730 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
22731 DataType::Decimal { precision, scale }
22732 } else if upper == "NUMERIC" {
22733 DataType::Decimal {
22734 precision: None,
22735 scale: None,
22736 }
22737 } else {
22738 return Ok(Expression::DataType(dt));
22739 }
22740 }
22741 DataType::Float {
22742 precision: Some(p), ..
22743 } => {
22744 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
22745 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
22746 let boundary = match target {
22747 DialectType::Hive
22748 | DialectType::Spark
22749 | DialectType::Databricks => 32,
22750 _ => 24,
22751 };
22752 if *p <= boundary {
22753 DataType::Float {
22754 precision: None,
22755 scale: None,
22756 real_spelling: false,
22757 }
22758 } else {
22759 DataType::Double {
22760 precision: None,
22761 scale: None,
22762 }
22763 }
22764 }
22765 DataType::TinyInt { .. } => match target {
22766 DialectType::DuckDB => DataType::Custom {
22767 name: "UTINYINT".to_string(),
22768 },
22769 DialectType::Hive
22770 | DialectType::Spark
22771 | DialectType::Databricks => DataType::SmallInt { length: None },
22772 _ => return Ok(Expression::DataType(dt)),
22773 },
22774 // INTEGER -> INT for Spark/Databricks
22775 DataType::Int {
22776 length,
22777 integer_spelling: true,
22778 } => DataType::Int {
22779 length: *length,
22780 integer_spelling: false,
22781 },
22782 _ => return Ok(Expression::DataType(dt)),
22783 };
22784 Ok(Expression::DataType(new_dt))
22785 } else {
22786 Ok(e)
22787 }
22788 }
22789 Action::MySQLSafeDivide => {
22790 use crate::expressions::{BinaryOp, Cast};
22791 if let Expression::Div(op) = e {
22792 let left = op.left;
22793 let right = op.right;
22794 // For SQLite: CAST left as REAL but NO NULLIF wrapping
22795 if matches!(target, DialectType::SQLite) {
22796 let new_left = Expression::Cast(Box::new(Cast {
22797 this: left,
22798 to: DataType::Float {
22799 precision: None,
22800 scale: None,
22801 real_spelling: true,
22802 },
22803 trailing_comments: Vec::new(),
22804 double_colon_syntax: false,
22805 format: None,
22806 default: None,
22807 inferred_type: None,
22808 }));
22809 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
22810 }
22811 // Wrap right in NULLIF(right, 0)
22812 let nullif_right = Expression::Function(Box::new(Function::new(
22813 "NULLIF".to_string(),
22814 vec![right, Expression::number(0)],
22815 )));
22816 // For some dialects, also CAST the left side
22817 let new_left = match target {
22818 DialectType::PostgreSQL
22819 | DialectType::Redshift
22820 | DialectType::Teradata
22821 | DialectType::Materialize
22822 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
22823 this: left,
22824 to: DataType::Custom {
22825 name: "DOUBLE PRECISION".to_string(),
22826 },
22827 trailing_comments: Vec::new(),
22828 double_colon_syntax: false,
22829 format: None,
22830 default: None,
22831 inferred_type: None,
22832 })),
22833 DialectType::Drill
22834 | DialectType::Trino
22835 | DialectType::Presto
22836 | DialectType::Athena => Expression::Cast(Box::new(Cast {
22837 this: left,
22838 to: DataType::Double {
22839 precision: None,
22840 scale: None,
22841 },
22842 trailing_comments: Vec::new(),
22843 double_colon_syntax: false,
22844 format: None,
22845 default: None,
22846 inferred_type: None,
22847 })),
22848 DialectType::TSQL => Expression::Cast(Box::new(Cast {
22849 this: left,
22850 to: DataType::Float {
22851 precision: None,
22852 scale: None,
22853 real_spelling: false,
22854 },
22855 trailing_comments: Vec::new(),
22856 double_colon_syntax: false,
22857 format: None,
22858 default: None,
22859 inferred_type: None,
22860 })),
22861 _ => left,
22862 };
22863 Ok(Expression::Div(Box::new(BinaryOp::new(
22864 new_left,
22865 nullif_right,
22866 ))))
22867 } else {
22868 Ok(e)
22869 }
22870 }
22871 Action::AlterTableRenameStripSchema => {
22872 if let Expression::AlterTable(mut at) = e {
22873 if let Some(crate::expressions::AlterTableAction::RenameTable(
22874 ref mut new_tbl,
22875 )) = at.actions.first_mut()
22876 {
22877 new_tbl.schema = None;
22878 new_tbl.catalog = None;
22879 }
22880 Ok(Expression::AlterTable(at))
22881 } else {
22882 Ok(e)
22883 }
22884 }
22885 Action::NullsOrdering => {
22886 // Fill in the source dialect's implied null ordering default.
22887 // This makes implicit null ordering explicit so the target generator
22888 // can correctly strip or keep it.
22889 //
22890 // Dialect null ordering categories:
22891 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
22892 // ASC -> NULLS LAST, DESC -> NULLS FIRST
22893 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
22894 // ASC -> NULLS FIRST, DESC -> NULLS LAST
22895 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
22896 // NULLS LAST always (both ASC and DESC)
22897 if let Expression::Ordered(mut o) = e {
22898 let is_asc = !o.desc;
22899
22900 let is_source_nulls_large = matches!(
22901 source,
22902 DialectType::Oracle
22903 | DialectType::PostgreSQL
22904 | DialectType::Redshift
22905 | DialectType::Snowflake
22906 );
22907 let is_source_nulls_last = matches!(
22908 source,
22909 DialectType::DuckDB
22910 | DialectType::Presto
22911 | DialectType::Trino
22912 | DialectType::Dremio
22913 | DialectType::Athena
22914 | DialectType::ClickHouse
22915 | DialectType::Drill
22916 | DialectType::Exasol
22917 | DialectType::DataFusion
22918 );
22919
22920 // Determine target category to check if default matches
22921 let is_target_nulls_large = matches!(
22922 target,
22923 DialectType::Oracle
22924 | DialectType::PostgreSQL
22925 | DialectType::Redshift
22926 | DialectType::Snowflake
22927 );
22928 let is_target_nulls_last = matches!(
22929 target,
22930 DialectType::DuckDB
22931 | DialectType::Presto
22932 | DialectType::Trino
22933 | DialectType::Dremio
22934 | DialectType::Athena
22935 | DialectType::ClickHouse
22936 | DialectType::Drill
22937 | DialectType::Exasol
22938 | DialectType::DataFusion
22939 );
22940
22941 // Compute the implied nulls_first for source
22942 let source_nulls_first = if is_source_nulls_large {
22943 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
22944 } else if is_source_nulls_last {
22945 false // NULLS LAST always
22946 } else {
22947 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
22948 };
22949
22950 // Compute the target's default
22951 let target_nulls_first = if is_target_nulls_large {
22952 !is_asc
22953 } else if is_target_nulls_last {
22954 false
22955 } else {
22956 is_asc
22957 };
22958
22959 // Only add explicit nulls ordering if source and target defaults differ
22960 if source_nulls_first != target_nulls_first {
22961 o.nulls_first = Some(source_nulls_first);
22962 }
22963 // If they match, leave nulls_first as None so the generator won't output it
22964
22965 Ok(Expression::Ordered(o))
22966 } else {
22967 Ok(e)
22968 }
22969 }
22970 Action::StringAggConvert => {
22971 match e {
22972 Expression::WithinGroup(wg) => {
22973 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
22974 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
22975 let (x_opt, sep_opt, distinct) = match wg.this {
22976 Expression::AggregateFunction(ref af)
22977 if af.name.eq_ignore_ascii_case("STRING_AGG")
22978 && af.args.len() >= 2 =>
22979 {
22980 (
22981 Some(af.args[0].clone()),
22982 Some(af.args[1].clone()),
22983 af.distinct,
22984 )
22985 }
22986 Expression::Function(ref f)
22987 if f.name.eq_ignore_ascii_case("STRING_AGG")
22988 && f.args.len() >= 2 =>
22989 {
22990 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
22991 }
22992 Expression::StringAgg(ref sa) => {
22993 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
22994 }
22995 _ => (None, None, false),
22996 };
22997 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
22998 let order_by = wg.order_by;
22999
23000 match target {
23001 DialectType::TSQL | DialectType::Fabric => {
23002 // Keep as WithinGroup(StringAgg) for TSQL
23003 Ok(Expression::WithinGroup(Box::new(
23004 crate::expressions::WithinGroup {
23005 this: Expression::StringAgg(Box::new(
23006 crate::expressions::StringAggFunc {
23007 this: x,
23008 separator: Some(sep),
23009 order_by: None, // order_by goes in WithinGroup, not StringAgg
23010 distinct,
23011 filter: None,
23012 limit: None,
23013 inferred_type: None,
23014 },
23015 )),
23016 order_by,
23017 },
23018 )))
23019 }
23020 DialectType::MySQL
23021 | DialectType::SingleStore
23022 | DialectType::Doris
23023 | DialectType::StarRocks => {
23024 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
23025 Ok(Expression::GroupConcat(Box::new(
23026 crate::expressions::GroupConcatFunc {
23027 this: x,
23028 separator: Some(sep),
23029 order_by: Some(order_by),
23030 distinct,
23031 filter: None,
23032 limit: None,
23033 inferred_type: None,
23034 },
23035 )))
23036 }
23037 DialectType::SQLite => {
23038 // GROUP_CONCAT(x, sep) - no ORDER BY support
23039 Ok(Expression::GroupConcat(Box::new(
23040 crate::expressions::GroupConcatFunc {
23041 this: x,
23042 separator: Some(sep),
23043 order_by: None,
23044 distinct,
23045 filter: None,
23046 limit: None,
23047 inferred_type: None,
23048 },
23049 )))
23050 }
23051 DialectType::PostgreSQL | DialectType::Redshift => {
23052 // STRING_AGG(x, sep ORDER BY z)
23053 Ok(Expression::StringAgg(Box::new(
23054 crate::expressions::StringAggFunc {
23055 this: x,
23056 separator: Some(sep),
23057 order_by: Some(order_by),
23058 distinct,
23059 filter: None,
23060 limit: None,
23061 inferred_type: None,
23062 },
23063 )))
23064 }
23065 _ => {
23066 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
23067 Ok(Expression::StringAgg(Box::new(
23068 crate::expressions::StringAggFunc {
23069 this: x,
23070 separator: Some(sep),
23071 order_by: Some(order_by),
23072 distinct,
23073 filter: None,
23074 limit: None,
23075 inferred_type: None,
23076 },
23077 )))
23078 }
23079 }
23080 } else {
23081 Ok(Expression::WithinGroup(wg))
23082 }
23083 }
23084 Expression::StringAgg(sa) => {
23085 match target {
23086 DialectType::MySQL
23087 | DialectType::SingleStore
23088 | DialectType::Doris
23089 | DialectType::StarRocks => {
23090 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
23091 Ok(Expression::GroupConcat(Box::new(
23092 crate::expressions::GroupConcatFunc {
23093 this: sa.this,
23094 separator: sa.separator,
23095 order_by: sa.order_by,
23096 distinct: sa.distinct,
23097 filter: sa.filter,
23098 limit: None,
23099 inferred_type: None,
23100 },
23101 )))
23102 }
23103 DialectType::SQLite => {
23104 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
23105 Ok(Expression::GroupConcat(Box::new(
23106 crate::expressions::GroupConcatFunc {
23107 this: sa.this,
23108 separator: sa.separator,
23109 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
23110 distinct: sa.distinct,
23111 filter: sa.filter,
23112 limit: None,
23113 inferred_type: None,
23114 },
23115 )))
23116 }
23117 DialectType::Spark | DialectType::Databricks => {
23118 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
23119 Ok(Expression::ListAgg(Box::new(
23120 crate::expressions::ListAggFunc {
23121 this: sa.this,
23122 separator: sa.separator,
23123 on_overflow: None,
23124 order_by: sa.order_by,
23125 distinct: sa.distinct,
23126 filter: None,
23127 inferred_type: None,
23128 },
23129 )))
23130 }
23131 _ => Ok(Expression::StringAgg(sa)),
23132 }
23133 }
23134 _ => Ok(e),
23135 }
23136 }
23137 Action::GroupConcatConvert => {
23138 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
23139 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
23140 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
23141 if let Expression::Function(ref f) = expr {
23142 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
23143 let mut result = f.args[0].clone();
23144 for arg in &f.args[1..] {
23145 result = Expression::Concat(Box::new(BinaryOp {
23146 left: result,
23147 right: arg.clone(),
23148 left_comments: vec![],
23149 operator_comments: vec![],
23150 trailing_comments: vec![],
23151 inferred_type: None,
23152 }));
23153 }
23154 return result;
23155 }
23156 }
23157 expr
23158 }
23159 fn expand_concat_to_plus(expr: Expression) -> Expression {
23160 if let Expression::Function(ref f) = expr {
23161 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
23162 let mut result = f.args[0].clone();
23163 for arg in &f.args[1..] {
23164 result = Expression::Add(Box::new(BinaryOp {
23165 left: result,
23166 right: arg.clone(),
23167 left_comments: vec![],
23168 operator_comments: vec![],
23169 trailing_comments: vec![],
23170 inferred_type: None,
23171 }));
23172 }
23173 return result;
23174 }
23175 }
23176 expr
23177 }
23178 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
23179 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
23180 if let Expression::Function(ref f) = expr {
23181 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
23182 let new_args: Vec<Expression> = f
23183 .args
23184 .iter()
23185 .map(|arg| {
23186 Expression::Cast(Box::new(crate::expressions::Cast {
23187 this: arg.clone(),
23188 to: crate::expressions::DataType::VarChar {
23189 length: None,
23190 parenthesized_length: false,
23191 },
23192 trailing_comments: Vec::new(),
23193 double_colon_syntax: false,
23194 format: None,
23195 default: None,
23196 inferred_type: None,
23197 }))
23198 })
23199 .collect();
23200 return Expression::Function(Box::new(
23201 crate::expressions::Function::new(
23202 "CONCAT".to_string(),
23203 new_args,
23204 ),
23205 ));
23206 }
23207 }
23208 expr
23209 }
23210 if let Expression::GroupConcat(gc) = e {
23211 match target {
23212 DialectType::Presto => {
23213 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
23214 let sep = gc.separator.unwrap_or(Expression::string(","));
23215 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
23216 let this = wrap_concat_args_in_varchar_cast(gc.this);
23217 let array_agg =
23218 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
23219 this,
23220 distinct: gc.distinct,
23221 filter: gc.filter,
23222 order_by: gc.order_by.unwrap_or_default(),
23223 name: None,
23224 ignore_nulls: None,
23225 having_max: None,
23226 limit: None,
23227 inferred_type: None,
23228 }));
23229 Ok(Expression::ArrayJoin(Box::new(
23230 crate::expressions::ArrayJoinFunc {
23231 this: array_agg,
23232 separator: sep,
23233 null_replacement: None,
23234 },
23235 )))
23236 }
23237 DialectType::Trino => {
23238 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
23239 let sep = gc.separator.unwrap_or(Expression::string(","));
23240 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
23241 let this = wrap_concat_args_in_varchar_cast(gc.this);
23242 Ok(Expression::ListAgg(Box::new(
23243 crate::expressions::ListAggFunc {
23244 this,
23245 separator: Some(sep),
23246 on_overflow: None,
23247 order_by: gc.order_by,
23248 distinct: gc.distinct,
23249 filter: gc.filter,
23250 inferred_type: None,
23251 },
23252 )))
23253 }
23254 DialectType::PostgreSQL
23255 | DialectType::Redshift
23256 | DialectType::Snowflake
23257 | DialectType::DuckDB
23258 | DialectType::Hive
23259 | DialectType::ClickHouse => {
23260 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
23261 let sep = gc.separator.unwrap_or(Expression::string(","));
23262 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
23263 let this = expand_concat_to_dpipe(gc.this);
23264 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
23265 let order_by = if target == DialectType::PostgreSQL {
23266 gc.order_by.map(|ords| {
23267 ords.into_iter()
23268 .map(|mut o| {
23269 if o.nulls_first.is_none() {
23270 if o.desc {
23271 o.nulls_first = Some(false);
23272 // NULLS LAST
23273 } else {
23274 o.nulls_first = Some(true);
23275 // NULLS FIRST
23276 }
23277 }
23278 o
23279 })
23280 .collect()
23281 })
23282 } else {
23283 gc.order_by
23284 };
23285 Ok(Expression::StringAgg(Box::new(
23286 crate::expressions::StringAggFunc {
23287 this,
23288 separator: Some(sep),
23289 order_by,
23290 distinct: gc.distinct,
23291 filter: gc.filter,
23292 limit: None,
23293 inferred_type: None,
23294 },
23295 )))
23296 }
23297 DialectType::TSQL => {
23298 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
23299 // TSQL doesn't support DISTINCT in STRING_AGG
23300 let sep = gc.separator.unwrap_or(Expression::string(","));
23301 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
23302 let this = expand_concat_to_plus(gc.this);
23303 Ok(Expression::StringAgg(Box::new(
23304 crate::expressions::StringAggFunc {
23305 this,
23306 separator: Some(sep),
23307 order_by: gc.order_by,
23308 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
23309 filter: gc.filter,
23310 limit: None,
23311 inferred_type: None,
23312 },
23313 )))
23314 }
23315 DialectType::SQLite => {
23316 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
23317 // SQLite GROUP_CONCAT doesn't support ORDER BY
23318 // Expand CONCAT(a,b,c) -> a || b || c
23319 let this = expand_concat_to_dpipe(gc.this);
23320 Ok(Expression::GroupConcat(Box::new(
23321 crate::expressions::GroupConcatFunc {
23322 this,
23323 separator: gc.separator,
23324 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
23325 distinct: gc.distinct,
23326 filter: gc.filter,
23327 limit: None,
23328 inferred_type: None,
23329 },
23330 )))
23331 }
23332 DialectType::Spark | DialectType::Databricks => {
23333 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
23334 let sep = gc.separator.unwrap_or(Expression::string(","));
23335 Ok(Expression::ListAgg(Box::new(
23336 crate::expressions::ListAggFunc {
23337 this: gc.this,
23338 separator: Some(sep),
23339 on_overflow: None,
23340 order_by: gc.order_by,
23341 distinct: gc.distinct,
23342 filter: None,
23343 inferred_type: None,
23344 },
23345 )))
23346 }
23347 DialectType::MySQL
23348 | DialectType::SingleStore
23349 | DialectType::StarRocks => {
23350 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
23351 if gc.separator.is_none() {
23352 let mut gc = gc;
23353 gc.separator = Some(Expression::string(","));
23354 Ok(Expression::GroupConcat(gc))
23355 } else {
23356 Ok(Expression::GroupConcat(gc))
23357 }
23358 }
23359 _ => Ok(Expression::GroupConcat(gc)),
23360 }
23361 } else {
23362 Ok(e)
23363 }
23364 }
23365 Action::TempTableHash => {
23366 match e {
23367 Expression::CreateTable(mut ct) => {
23368 // TSQL #table -> TEMPORARY TABLE with # stripped from name
23369 let name = &ct.name.name.name;
23370 if name.starts_with('#') {
23371 ct.name.name.name = name.trim_start_matches('#').to_string();
23372 }
23373 // Set temporary flag
23374 ct.temporary = true;
23375 Ok(Expression::CreateTable(ct))
23376 }
23377 Expression::Table(mut tr) => {
23378 // Strip # from table references
23379 let name = &tr.name.name;
23380 if name.starts_with('#') {
23381 tr.name.name = name.trim_start_matches('#').to_string();
23382 }
23383 Ok(Expression::Table(tr))
23384 }
23385 Expression::DropTable(mut dt) => {
23386 // Strip # from DROP TABLE names
23387 for table_ref in &mut dt.names {
23388 if table_ref.name.name.starts_with('#') {
23389 table_ref.name.name =
23390 table_ref.name.name.trim_start_matches('#').to_string();
23391 }
23392 }
23393 Ok(Expression::DropTable(dt))
23394 }
23395 _ => Ok(e),
23396 }
23397 }
23398 Action::NvlClearOriginal => {
23399 if let Expression::Nvl(mut f) = e {
23400 f.original_name = None;
23401 Ok(Expression::Nvl(f))
23402 } else {
23403 Ok(e)
23404 }
23405 }
23406 Action::HiveCastToTryCast => {
23407 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
23408 if let Expression::Cast(mut c) = e {
23409 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
23410 // (Spark's TIMESTAMP is always timezone-aware)
23411 if matches!(target, DialectType::DuckDB)
23412 && matches!(source, DialectType::Spark | DialectType::Databricks)
23413 && matches!(
23414 c.to,
23415 DataType::Timestamp {
23416 timezone: false,
23417 ..
23418 }
23419 )
23420 {
23421 c.to = DataType::Custom {
23422 name: "TIMESTAMPTZ".to_string(),
23423 };
23424 }
23425 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
23426 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
23427 if matches!(target, DialectType::Databricks | DialectType::Spark)
23428 && matches!(
23429 source,
23430 DialectType::Spark | DialectType::Databricks | DialectType::Hive
23431 )
23432 && Self::has_varchar_char_type(&c.to)
23433 {
23434 c.to = Self::normalize_varchar_to_string(c.to);
23435 }
23436 Ok(Expression::TryCast(c))
23437 } else {
23438 Ok(e)
23439 }
23440 }
23441 Action::XorExpand => {
23442 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
23443 // Snowflake: use BOOLXOR(a, b) instead
23444 if let Expression::Xor(xor) = e {
23445 // Collect all XOR operands
23446 let mut operands = Vec::new();
23447 if let Some(this) = xor.this {
23448 operands.push(*this);
23449 }
23450 if let Some(expr) = xor.expression {
23451 operands.push(*expr);
23452 }
23453 operands.extend(xor.expressions);
23454
23455 // Snowflake: use BOOLXOR(a, b)
23456 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
23457 let a = operands.remove(0);
23458 let b = operands.remove(0);
23459 return Ok(Expression::Function(Box::new(Function::new(
23460 "BOOLXOR".to_string(),
23461 vec![a, b],
23462 ))));
23463 }
23464
23465 // Helper to build (a AND NOT b) OR (NOT a AND b)
23466 let make_xor = |a: Expression, b: Expression| -> Expression {
23467 let not_b = Expression::Not(Box::new(
23468 crate::expressions::UnaryOp::new(b.clone()),
23469 ));
23470 let not_a = Expression::Not(Box::new(
23471 crate::expressions::UnaryOp::new(a.clone()),
23472 ));
23473 let left_and = Expression::And(Box::new(BinaryOp {
23474 left: a,
23475 right: Expression::Paren(Box::new(Paren {
23476 this: not_b,
23477 trailing_comments: Vec::new(),
23478 })),
23479 left_comments: Vec::new(),
23480 operator_comments: Vec::new(),
23481 trailing_comments: Vec::new(),
23482 inferred_type: None,
23483 }));
23484 let right_and = Expression::And(Box::new(BinaryOp {
23485 left: Expression::Paren(Box::new(Paren {
23486 this: not_a,
23487 trailing_comments: Vec::new(),
23488 })),
23489 right: b,
23490 left_comments: Vec::new(),
23491 operator_comments: Vec::new(),
23492 trailing_comments: Vec::new(),
23493 inferred_type: None,
23494 }));
23495 Expression::Or(Box::new(BinaryOp {
23496 left: Expression::Paren(Box::new(Paren {
23497 this: left_and,
23498 trailing_comments: Vec::new(),
23499 })),
23500 right: Expression::Paren(Box::new(Paren {
23501 this: right_and,
23502 trailing_comments: Vec::new(),
23503 })),
23504 left_comments: Vec::new(),
23505 operator_comments: Vec::new(),
23506 trailing_comments: Vec::new(),
23507 inferred_type: None,
23508 }))
23509 };
23510
23511 if operands.len() >= 2 {
23512 let mut result = make_xor(operands.remove(0), operands.remove(0));
23513 for operand in operands {
23514 result = make_xor(result, operand);
23515 }
23516 Ok(result)
23517 } else if operands.len() == 1 {
23518 Ok(operands.remove(0))
23519 } else {
23520 // No operands - return FALSE (shouldn't happen)
23521 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
23522 value: false,
23523 }))
23524 }
23525 } else {
23526 Ok(e)
23527 }
23528 }
23529 Action::DatePartUnquote => {
23530 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
23531 // Convert the quoted string first arg to a bare Column/Identifier
23532 if let Expression::Function(mut f) = e {
23533 if let Some(Expression::Literal(lit)) = f.args.first() {
23534 if let crate::expressions::Literal::String(s) = lit.as_ref() {
23535 let bare_name = s.to_ascii_lowercase();
23536 f.args[0] =
23537 Expression::Column(Box::new(crate::expressions::Column {
23538 name: Identifier::new(bare_name),
23539 table: None,
23540 join_mark: false,
23541 trailing_comments: Vec::new(),
23542 span: None,
23543 inferred_type: None,
23544 }));
23545 }
23546 }
23547 Ok(Expression::Function(f))
23548 } else {
23549 Ok(e)
23550 }
23551 }
23552 Action::ArrayLengthConvert => {
23553 // Extract the argument from the expression
23554 let arg = match e {
23555 Expression::Cardinality(ref f) => f.this.clone(),
23556 Expression::ArrayLength(ref f) => f.this.clone(),
23557 Expression::ArraySize(ref f) => f.this.clone(),
23558 _ => return Ok(e),
23559 };
23560 match target {
23561 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23562 Ok(Expression::Function(Box::new(Function::new(
23563 "SIZE".to_string(),
23564 vec![arg],
23565 ))))
23566 }
23567 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23568 Ok(Expression::Cardinality(Box::new(
23569 crate::expressions::UnaryFunc::new(arg),
23570 )))
23571 }
23572 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
23573 crate::expressions::UnaryFunc::new(arg),
23574 ))),
23575 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
23576 crate::expressions::UnaryFunc::new(arg),
23577 ))),
23578 DialectType::PostgreSQL | DialectType::Redshift => {
23579 // PostgreSQL ARRAY_LENGTH requires dimension arg
23580 Ok(Expression::Function(Box::new(Function::new(
23581 "ARRAY_LENGTH".to_string(),
23582 vec![arg, Expression::number(1)],
23583 ))))
23584 }
23585 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
23586 crate::expressions::UnaryFunc::new(arg),
23587 ))),
23588 _ => Ok(e), // Keep original
23589 }
23590 }
23591
23592 Action::JsonExtractToArrow => {
23593 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
23594 if let Expression::JsonExtract(mut f) = e {
23595 f.arrow_syntax = true;
23596 // Transform path: convert bracket notation to dot notation
23597 // SQLite strips wildcards, DuckDB preserves them
23598 if let Expression::Literal(ref lit) = f.path {
23599 if let Literal::String(ref s) = lit.as_ref() {
23600 let mut transformed = s.clone();
23601 if matches!(target, DialectType::SQLite) {
23602 transformed = Self::strip_json_wildcards(&transformed);
23603 }
23604 transformed = Self::bracket_to_dot_notation(&transformed);
23605 if transformed != *s {
23606 f.path = Expression::string(&transformed);
23607 }
23608 }
23609 }
23610 Ok(Expression::JsonExtract(f))
23611 } else {
23612 Ok(e)
23613 }
23614 }
23615
23616 Action::JsonExtractToGetJsonObject => {
23617 if let Expression::JsonExtract(f) = e {
23618 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
23619 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
23620 // Use proper decomposition that handles brackets
23621 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
23622 if let Literal::String(ref s) = lit.as_ref() {
23623 let parts = Self::decompose_json_path(s);
23624 parts.into_iter().map(|k| Expression::string(&k)).collect()
23625 } else {
23626 vec![]
23627 }
23628 } else {
23629 vec![f.path]
23630 };
23631 let func_name = if matches!(target, DialectType::Redshift) {
23632 "JSON_EXTRACT_PATH_TEXT"
23633 } else {
23634 "JSON_EXTRACT_PATH"
23635 };
23636 let mut args = vec![f.this];
23637 args.extend(keys);
23638 Ok(Expression::Function(Box::new(Function::new(
23639 func_name.to_string(),
23640 args,
23641 ))))
23642 } else {
23643 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23644 // Convert bracket double quotes to single quotes
23645 let path = if let Expression::Literal(ref lit) = f.path {
23646 if let Literal::String(ref s) = lit.as_ref() {
23647 let normalized = Self::bracket_to_single_quotes(s);
23648 if normalized != *s {
23649 Expression::string(&normalized)
23650 } else {
23651 f.path.clone()
23652 }
23653 } else {
23654 f.path.clone()
23655 }
23656 } else {
23657 f.path.clone()
23658 };
23659 Ok(Expression::Function(Box::new(Function::new(
23660 "GET_JSON_OBJECT".to_string(),
23661 vec![f.this, path],
23662 ))))
23663 }
23664 } else {
23665 Ok(e)
23666 }
23667 }
23668
23669 Action::JsonExtractScalarToGetJsonObject => {
23670 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23671 if let Expression::JsonExtractScalar(f) = e {
23672 Ok(Expression::Function(Box::new(Function::new(
23673 "GET_JSON_OBJECT".to_string(),
23674 vec![f.this, f.path],
23675 ))))
23676 } else {
23677 Ok(e)
23678 }
23679 }
23680
23681 Action::JsonExtractToTsql => {
23682 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
23683 let (this, path) = match e {
23684 Expression::JsonExtract(f) => (f.this, f.path),
23685 Expression::JsonExtractScalar(f) => (f.this, f.path),
23686 _ => return Ok(e),
23687 };
23688 // Transform path: strip wildcards, convert bracket notation to dot notation
23689 let transformed_path = if let Expression::Literal(ref lit) = path {
23690 if let Literal::String(ref s) = lit.as_ref() {
23691 let stripped = Self::strip_json_wildcards(s);
23692 let dotted = Self::bracket_to_dot_notation(&stripped);
23693 Expression::string(&dotted)
23694 } else {
23695 path.clone()
23696 }
23697 } else {
23698 path
23699 };
23700 let json_query = Expression::Function(Box::new(Function::new(
23701 "JSON_QUERY".to_string(),
23702 vec![this.clone(), transformed_path.clone()],
23703 )));
23704 let json_value = Expression::Function(Box::new(Function::new(
23705 "JSON_VALUE".to_string(),
23706 vec![this, transformed_path],
23707 )));
23708 Ok(Expression::Function(Box::new(Function::new(
23709 "ISNULL".to_string(),
23710 vec![json_query, json_value],
23711 ))))
23712 }
23713
23714 Action::JsonExtractToClickHouse => {
23715 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
23716 let (this, path) = match e {
23717 Expression::JsonExtract(f) => (f.this, f.path),
23718 Expression::JsonExtractScalar(f) => (f.this, f.path),
23719 _ => return Ok(e),
23720 };
23721 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
23722 if let Literal::String(ref s) = lit.as_ref() {
23723 let parts = Self::decompose_json_path(s);
23724 let mut result = vec![this];
23725 for part in parts {
23726 // ClickHouse uses 1-based integer indices for array access
23727 if let Ok(idx) = part.parse::<i64>() {
23728 result.push(Expression::number(idx + 1));
23729 } else {
23730 result.push(Expression::string(&part));
23731 }
23732 }
23733 result
23734 } else {
23735 vec![]
23736 }
23737 } else {
23738 vec![this, path]
23739 };
23740 Ok(Expression::Function(Box::new(Function::new(
23741 "JSONExtractString".to_string(),
23742 args,
23743 ))))
23744 }
23745
23746 Action::JsonExtractScalarConvert => {
23747 // JSON_EXTRACT_SCALAR -> target-specific
23748 if let Expression::JsonExtractScalar(f) = e {
23749 match target {
23750 DialectType::PostgreSQL | DialectType::Redshift => {
23751 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
23752 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
23753 {
23754 if let Literal::String(ref s) = lit.as_ref() {
23755 let parts = Self::decompose_json_path(s);
23756 parts.into_iter().map(|k| Expression::string(&k)).collect()
23757 } else {
23758 vec![]
23759 }
23760 } else {
23761 vec![f.path]
23762 };
23763 let mut args = vec![f.this];
23764 args.extend(keys);
23765 Ok(Expression::Function(Box::new(Function::new(
23766 "JSON_EXTRACT_PATH_TEXT".to_string(),
23767 args,
23768 ))))
23769 }
23770 DialectType::Snowflake => {
23771 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
23772 let stripped_path = if let Expression::Literal(ref lit) = f.path {
23773 if let Literal::String(ref s) = lit.as_ref() {
23774 let stripped = Self::strip_json_dollar_prefix(s);
23775 Expression::string(&stripped)
23776 } else {
23777 f.path.clone()
23778 }
23779 } else {
23780 f.path
23781 };
23782 Ok(Expression::Function(Box::new(Function::new(
23783 "JSON_EXTRACT_PATH_TEXT".to_string(),
23784 vec![f.this, stripped_path],
23785 ))))
23786 }
23787 DialectType::SQLite | DialectType::DuckDB => {
23788 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
23789 Ok(Expression::JsonExtractScalar(Box::new(
23790 crate::expressions::JsonExtractFunc {
23791 this: f.this,
23792 path: f.path,
23793 returning: f.returning,
23794 arrow_syntax: true,
23795 hash_arrow_syntax: false,
23796 wrapper_option: None,
23797 quotes_option: None,
23798 on_scalar_string: false,
23799 on_error: None,
23800 },
23801 )))
23802 }
23803 _ => Ok(Expression::JsonExtractScalar(f)),
23804 }
23805 } else {
23806 Ok(e)
23807 }
23808 }
23809
23810 Action::JsonPathNormalize => {
23811 // Normalize JSON path format for BigQuery, MySQL, etc.
23812 if let Expression::JsonExtract(mut f) = e {
23813 if let Expression::Literal(ref lit) = f.path {
23814 if let Literal::String(ref s) = lit.as_ref() {
23815 let mut normalized = s.clone();
23816 // Convert bracket notation and handle wildcards per dialect
23817 match target {
23818 DialectType::BigQuery => {
23819 // BigQuery strips wildcards and uses single quotes in brackets
23820 normalized = Self::strip_json_wildcards(&normalized);
23821 normalized = Self::bracket_to_single_quotes(&normalized);
23822 }
23823 DialectType::MySQL => {
23824 // MySQL preserves wildcards, converts brackets to dot notation
23825 normalized = Self::bracket_to_dot_notation(&normalized);
23826 }
23827 _ => {}
23828 }
23829 if normalized != *s {
23830 f.path = Expression::string(&normalized);
23831 }
23832 }
23833 }
23834 Ok(Expression::JsonExtract(f))
23835 } else {
23836 Ok(e)
23837 }
23838 }
23839
23840 Action::JsonQueryValueConvert => {
23841 // JsonQuery/JsonValue -> target-specific
23842 let (f, is_query) = match e {
23843 Expression::JsonQuery(f) => (f, true),
23844 Expression::JsonValue(f) => (f, false),
23845 _ => return Ok(e),
23846 };
23847 match target {
23848 DialectType::TSQL | DialectType::Fabric => {
23849 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
23850 let json_query = Expression::Function(Box::new(Function::new(
23851 "JSON_QUERY".to_string(),
23852 vec![f.this.clone(), f.path.clone()],
23853 )));
23854 let json_value = Expression::Function(Box::new(Function::new(
23855 "JSON_VALUE".to_string(),
23856 vec![f.this, f.path],
23857 )));
23858 Ok(Expression::Function(Box::new(Function::new(
23859 "ISNULL".to_string(),
23860 vec![json_query, json_value],
23861 ))))
23862 }
23863 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23864 Ok(Expression::Function(Box::new(Function::new(
23865 "GET_JSON_OBJECT".to_string(),
23866 vec![f.this, f.path],
23867 ))))
23868 }
23869 DialectType::PostgreSQL | DialectType::Redshift => {
23870 Ok(Expression::Function(Box::new(Function::new(
23871 "JSON_EXTRACT_PATH_TEXT".to_string(),
23872 vec![f.this, f.path],
23873 ))))
23874 }
23875 DialectType::DuckDB | DialectType::SQLite => {
23876 // json -> path arrow syntax
23877 Ok(Expression::JsonExtract(Box::new(
23878 crate::expressions::JsonExtractFunc {
23879 this: f.this,
23880 path: f.path,
23881 returning: f.returning,
23882 arrow_syntax: true,
23883 hash_arrow_syntax: false,
23884 wrapper_option: f.wrapper_option,
23885 quotes_option: f.quotes_option,
23886 on_scalar_string: f.on_scalar_string,
23887 on_error: f.on_error,
23888 },
23889 )))
23890 }
23891 DialectType::Snowflake => {
23892 // GET_PATH(PARSE_JSON(json), 'path')
23893 // Strip $. prefix from path
23894 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
23895 let json_expr = match &f.this {
23896 Expression::Function(ref inner_f)
23897 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
23898 {
23899 f.this
23900 }
23901 Expression::ParseJson(_) => {
23902 // Already a ParseJson expression, which generates as PARSE_JSON(...)
23903 f.this
23904 }
23905 _ => Expression::Function(Box::new(Function::new(
23906 "PARSE_JSON".to_string(),
23907 vec![f.this],
23908 ))),
23909 };
23910 let path_str = match &f.path {
23911 Expression::Literal(lit)
23912 if matches!(lit.as_ref(), Literal::String(_)) =>
23913 {
23914 let Literal::String(s) = lit.as_ref() else {
23915 unreachable!()
23916 };
23917 let stripped = s.strip_prefix("$.").unwrap_or(s);
23918 Expression::Literal(Box::new(Literal::String(
23919 stripped.to_string(),
23920 )))
23921 }
23922 other => other.clone(),
23923 };
23924 Ok(Expression::Function(Box::new(Function::new(
23925 "GET_PATH".to_string(),
23926 vec![json_expr, path_str],
23927 ))))
23928 }
23929 _ => {
23930 // Default: keep as JSON_QUERY/JSON_VALUE function
23931 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
23932 Ok(Expression::Function(Box::new(Function::new(
23933 func_name.to_string(),
23934 vec![f.this, f.path],
23935 ))))
23936 }
23937 }
23938 }
23939
23940 Action::JsonLiteralToJsonParse => {
23941 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
23942 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
23943 if let Expression::Cast(c) = e {
23944 let func_name = if matches!(target, DialectType::Snowflake) {
23945 "PARSE_JSON"
23946 } else {
23947 "JSON_PARSE"
23948 };
23949 Ok(Expression::Function(Box::new(Function::new(
23950 func_name.to_string(),
23951 vec![c.this],
23952 ))))
23953 } else {
23954 Ok(e)
23955 }
23956 }
23957
23958 Action::DuckDBCastJsonToVariant => {
23959 if let Expression::Cast(c) = e {
23960 Ok(Expression::Cast(Box::new(Cast {
23961 this: c.this,
23962 to: DataType::Custom {
23963 name: "VARIANT".to_string(),
23964 },
23965 trailing_comments: c.trailing_comments,
23966 double_colon_syntax: false,
23967 format: None,
23968 default: None,
23969 inferred_type: None,
23970 })))
23971 } else {
23972 Ok(e)
23973 }
23974 }
23975
23976 Action::DuckDBTryCastJsonToTryJsonParse => {
23977 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
23978 if let Expression::TryCast(c) = e {
23979 let json_parse = Expression::Function(Box::new(Function::new(
23980 "JSON_PARSE".to_string(),
23981 vec![c.this],
23982 )));
23983 Ok(Expression::Function(Box::new(Function::new(
23984 "TRY".to_string(),
23985 vec![json_parse],
23986 ))))
23987 } else {
23988 Ok(e)
23989 }
23990 }
23991
23992 Action::DuckDBJsonFuncToJsonParse => {
23993 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
23994 if let Expression::Function(f) = e {
23995 let args = f.args;
23996 Ok(Expression::Function(Box::new(Function::new(
23997 "JSON_PARSE".to_string(),
23998 args,
23999 ))))
24000 } else {
24001 Ok(e)
24002 }
24003 }
24004
24005 Action::DuckDBJsonValidToIsJson => {
24006 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
24007 if let Expression::Function(mut f) = e {
24008 let arg = f.args.remove(0);
24009 Ok(Expression::IsJson(Box::new(crate::expressions::IsJson {
24010 this: arg,
24011 json_type: None,
24012 unique_keys: None,
24013 negated: false,
24014 })))
24015 } else {
24016 Ok(e)
24017 }
24018 }
24019
24020 Action::AtTimeZoneConvert => {
24021 // AT TIME ZONE -> target-specific conversion
24022 if let Expression::AtTimeZone(atz) = e {
24023 match target {
24024 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24025 Ok(Expression::Function(Box::new(Function::new(
24026 "AT_TIMEZONE".to_string(),
24027 vec![atz.this, atz.zone],
24028 ))))
24029 }
24030 DialectType::Spark | DialectType::Databricks => {
24031 Ok(Expression::Function(Box::new(Function::new(
24032 "FROM_UTC_TIMESTAMP".to_string(),
24033 vec![atz.this, atz.zone],
24034 ))))
24035 }
24036 DialectType::Snowflake => {
24037 // CONVERT_TIMEZONE('zone', expr)
24038 Ok(Expression::Function(Box::new(Function::new(
24039 "CONVERT_TIMEZONE".to_string(),
24040 vec![atz.zone, atz.this],
24041 ))))
24042 }
24043 DialectType::BigQuery => {
24044 // TIMESTAMP(DATETIME(expr, 'zone'))
24045 let datetime_call = Expression::Function(Box::new(Function::new(
24046 "DATETIME".to_string(),
24047 vec![atz.this, atz.zone],
24048 )));
24049 Ok(Expression::Function(Box::new(Function::new(
24050 "TIMESTAMP".to_string(),
24051 vec![datetime_call],
24052 ))))
24053 }
24054 _ => Ok(Expression::Function(Box::new(Function::new(
24055 "AT_TIMEZONE".to_string(),
24056 vec![atz.this, atz.zone],
24057 )))),
24058 }
24059 } else {
24060 Ok(e)
24061 }
24062 }
24063
24064 Action::DayOfWeekConvert => {
24065 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
24066 if let Expression::DayOfWeek(f) = e {
24067 match target {
24068 DialectType::DuckDB => Ok(Expression::Function(Box::new(
24069 Function::new("ISODOW".to_string(), vec![f.this]),
24070 ))),
24071 DialectType::Spark | DialectType::Databricks => {
24072 // ((DAYOFWEEK(x) % 7) + 1)
24073 let dayofweek = Expression::Function(Box::new(Function::new(
24074 "DAYOFWEEK".to_string(),
24075 vec![f.this],
24076 )));
24077 let modulo = Expression::Mod(Box::new(BinaryOp {
24078 left: dayofweek,
24079 right: Expression::number(7),
24080 left_comments: Vec::new(),
24081 operator_comments: Vec::new(),
24082 trailing_comments: Vec::new(),
24083 inferred_type: None,
24084 }));
24085 let paren_mod = Expression::Paren(Box::new(Paren {
24086 this: modulo,
24087 trailing_comments: Vec::new(),
24088 }));
24089 let add_one = Expression::Add(Box::new(BinaryOp {
24090 left: paren_mod,
24091 right: Expression::number(1),
24092 left_comments: Vec::new(),
24093 operator_comments: Vec::new(),
24094 trailing_comments: Vec::new(),
24095 inferred_type: None,
24096 }));
24097 Ok(Expression::Paren(Box::new(Paren {
24098 this: add_one,
24099 trailing_comments: Vec::new(),
24100 })))
24101 }
24102 _ => Ok(Expression::DayOfWeek(f)),
24103 }
24104 } else {
24105 Ok(e)
24106 }
24107 }
24108
24109 Action::MaxByMinByConvert => {
24110 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
24111 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
24112 // Handle both Expression::Function and Expression::AggregateFunction
24113 let (is_max, args) = match &e {
24114 Expression::Function(f) => {
24115 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
24116 }
24117 Expression::AggregateFunction(af) => {
24118 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
24119 }
24120 _ => return Ok(e),
24121 };
24122 match target {
24123 DialectType::ClickHouse => {
24124 let name = if is_max { "argMax" } else { "argMin" };
24125 let mut args = args;
24126 args.truncate(2);
24127 Ok(Expression::Function(Box::new(Function::new(
24128 name.to_string(),
24129 args,
24130 ))))
24131 }
24132 DialectType::DuckDB => {
24133 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
24134 Ok(Expression::Function(Box::new(Function::new(
24135 name.to_string(),
24136 args,
24137 ))))
24138 }
24139 DialectType::Spark | DialectType::Databricks => {
24140 let mut args = args;
24141 args.truncate(2);
24142 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
24143 Ok(Expression::Function(Box::new(Function::new(
24144 name.to_string(),
24145 args,
24146 ))))
24147 }
24148 _ => Ok(e),
24149 }
24150 }
24151
24152 Action::ElementAtConvert => {
24153 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
24154 let (arr, idx) = if let Expression::ElementAt(bf) = e {
24155 (bf.this, bf.expression)
24156 } else if let Expression::Function(ref f) = e {
24157 if f.args.len() >= 2 {
24158 if let Expression::Function(f) = e {
24159 let mut args = f.args;
24160 let arr = args.remove(0);
24161 let idx = args.remove(0);
24162 (arr, idx)
24163 } else {
24164 unreachable!("outer condition already matched Expression::Function")
24165 }
24166 } else {
24167 return Ok(e);
24168 }
24169 } else {
24170 return Ok(e);
24171 };
24172 match target {
24173 DialectType::PostgreSQL => {
24174 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
24175 let arr_expr = Expression::Paren(Box::new(Paren {
24176 this: arr,
24177 trailing_comments: vec![],
24178 }));
24179 Ok(Expression::Subscript(Box::new(
24180 crate::expressions::Subscript {
24181 this: arr_expr,
24182 index: idx,
24183 },
24184 )))
24185 }
24186 DialectType::BigQuery => {
24187 // BigQuery: convert ARRAY[...] to bare [...] for subscript
24188 let arr_expr = match arr {
24189 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
24190 crate::expressions::ArrayConstructor {
24191 expressions: af.expressions,
24192 bracket_notation: true,
24193 use_list_keyword: false,
24194 },
24195 )),
24196 other => other,
24197 };
24198 let safe_ordinal = Expression::Function(Box::new(Function::new(
24199 "SAFE_ORDINAL".to_string(),
24200 vec![idx],
24201 )));
24202 Ok(Expression::Subscript(Box::new(
24203 crate::expressions::Subscript {
24204 this: arr_expr,
24205 index: safe_ordinal,
24206 },
24207 )))
24208 }
24209 _ => Ok(Expression::Function(Box::new(Function::new(
24210 "ELEMENT_AT".to_string(),
24211 vec![arr, idx],
24212 )))),
24213 }
24214 }
24215
24216 Action::CurrentUserParens => {
24217 // CURRENT_USER -> CURRENT_USER() for Snowflake
24218 Ok(Expression::Function(Box::new(Function::new(
24219 "CURRENT_USER".to_string(),
24220 vec![],
24221 ))))
24222 }
24223
24224 Action::ArrayAggToCollectList => {
24225 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
24226 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
24227 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
24228 match e {
24229 Expression::AggregateFunction(mut af) => {
24230 let is_simple =
24231 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
24232 let args = if af.args.is_empty() {
24233 vec![]
24234 } else {
24235 vec![af.args[0].clone()]
24236 };
24237 af.name = "COLLECT_LIST".to_string();
24238 af.args = args;
24239 if is_simple {
24240 af.order_by = Vec::new();
24241 }
24242 Ok(Expression::AggregateFunction(af))
24243 }
24244 Expression::ArrayAgg(agg) => {
24245 let is_simple =
24246 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
24247 Ok(Expression::AggregateFunction(Box::new(
24248 crate::expressions::AggregateFunction {
24249 name: "COLLECT_LIST".to_string(),
24250 args: vec![agg.this.clone()],
24251 distinct: agg.distinct,
24252 filter: agg.filter.clone(),
24253 order_by: if is_simple {
24254 Vec::new()
24255 } else {
24256 agg.order_by.clone()
24257 },
24258 limit: agg.limit.clone(),
24259 ignore_nulls: agg.ignore_nulls,
24260 inferred_type: None,
24261 },
24262 )))
24263 }
24264 _ => Ok(e),
24265 }
24266 }
24267
24268 Action::ArraySyntaxConvert => {
24269 match e {
24270 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
24271 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
24272 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
24273 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
24274 expressions: arr.expressions,
24275 bracket_notation: true,
24276 use_list_keyword: false,
24277 })),
24278 ),
24279 // ARRAY(y) function style -> ArrayFunc for target dialect
24280 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
24281 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
24282 let bracket = matches!(
24283 target,
24284 DialectType::BigQuery
24285 | DialectType::DuckDB
24286 | DialectType::Snowflake
24287 | DialectType::ClickHouse
24288 | DialectType::StarRocks
24289 );
24290 Ok(Expression::ArrayFunc(Box::new(
24291 crate::expressions::ArrayConstructor {
24292 expressions: f.args,
24293 bracket_notation: bracket,
24294 use_list_keyword: false,
24295 },
24296 )))
24297 }
24298 _ => Ok(e),
24299 }
24300 }
24301
24302 Action::CastToJsonForSpark => {
24303 // CAST(x AS JSON) -> TO_JSON(x) for Spark
24304 if let Expression::Cast(c) = e {
24305 Ok(Expression::Function(Box::new(Function::new(
24306 "TO_JSON".to_string(),
24307 vec![c.this],
24308 ))))
24309 } else {
24310 Ok(e)
24311 }
24312 }
24313
24314 Action::CastJsonToFromJson => {
24315 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
24316 if let Expression::Cast(c) = e {
24317 // Extract the string literal from ParseJson
24318 let literal_expr = if let Expression::ParseJson(pj) = c.this {
24319 pj.this
24320 } else {
24321 c.this
24322 };
24323 // Convert the target DataType to Spark's type string format
24324 let type_str = Self::data_type_to_spark_string(&c.to);
24325 Ok(Expression::Function(Box::new(Function::new(
24326 "FROM_JSON".to_string(),
24327 vec![
24328 literal_expr,
24329 Expression::Literal(Box::new(Literal::String(type_str))),
24330 ],
24331 ))))
24332 } else {
24333 Ok(e)
24334 }
24335 }
24336
24337 Action::ToJsonConvert => {
24338 // TO_JSON(x) -> target-specific conversion
24339 if let Expression::ToJson(f) = e {
24340 let arg = f.this;
24341 match target {
24342 DialectType::Presto | DialectType::Trino => {
24343 // JSON_FORMAT(CAST(x AS JSON))
24344 let cast_json = Expression::Cast(Box::new(Cast {
24345 this: arg,
24346 to: DataType::Custom {
24347 name: "JSON".to_string(),
24348 },
24349 trailing_comments: vec![],
24350 double_colon_syntax: false,
24351 format: None,
24352 default: None,
24353 inferred_type: None,
24354 }));
24355 Ok(Expression::Function(Box::new(Function::new(
24356 "JSON_FORMAT".to_string(),
24357 vec![cast_json],
24358 ))))
24359 }
24360 DialectType::BigQuery => Ok(Expression::Function(Box::new(
24361 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
24362 ))),
24363 DialectType::DuckDB => {
24364 // CAST(TO_JSON(x) AS TEXT)
24365 let to_json =
24366 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
24367 this: arg,
24368 original_name: None,
24369 inferred_type: None,
24370 }));
24371 Ok(Expression::Cast(Box::new(Cast {
24372 this: to_json,
24373 to: DataType::Text,
24374 trailing_comments: vec![],
24375 double_colon_syntax: false,
24376 format: None,
24377 default: None,
24378 inferred_type: None,
24379 })))
24380 }
24381 _ => Ok(Expression::ToJson(Box::new(
24382 crate::expressions::UnaryFunc {
24383 this: arg,
24384 original_name: None,
24385 inferred_type: None,
24386 },
24387 ))),
24388 }
24389 } else {
24390 Ok(e)
24391 }
24392 }
24393
24394 Action::VarianceToClickHouse => {
24395 if let Expression::Variance(f) = e {
24396 Ok(Expression::Function(Box::new(Function::new(
24397 "varSamp".to_string(),
24398 vec![f.this],
24399 ))))
24400 } else {
24401 Ok(e)
24402 }
24403 }
24404
24405 Action::StddevToClickHouse => {
24406 if let Expression::Stddev(f) = e {
24407 Ok(Expression::Function(Box::new(Function::new(
24408 "stddevSamp".to_string(),
24409 vec![f.this],
24410 ))))
24411 } else {
24412 Ok(e)
24413 }
24414 }
24415
24416 Action::ApproxQuantileConvert => {
24417 if let Expression::ApproxQuantile(aq) = e {
24418 let mut args = vec![*aq.this];
24419 if let Some(q) = aq.quantile {
24420 args.push(*q);
24421 }
24422 Ok(Expression::Function(Box::new(Function::new(
24423 "APPROX_PERCENTILE".to_string(),
24424 args,
24425 ))))
24426 } else {
24427 Ok(e)
24428 }
24429 }
24430
24431 Action::DollarParamConvert => {
24432 if let Expression::Parameter(p) = e {
24433 Ok(Expression::Parameter(Box::new(
24434 crate::expressions::Parameter {
24435 name: p.name,
24436 index: p.index,
24437 style: crate::expressions::ParameterStyle::At,
24438 quoted: p.quoted,
24439 string_quoted: p.string_quoted,
24440 expression: p.expression,
24441 },
24442 )))
24443 } else {
24444 Ok(e)
24445 }
24446 }
24447
24448 Action::EscapeStringNormalize => {
24449 if let Expression::Literal(ref lit) = e {
24450 if let Literal::EscapeString(s) = lit.as_ref() {
24451 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
24452 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
24453 s[2..].to_string()
24454 } else {
24455 s.clone()
24456 };
24457 let normalized = stripped
24458 .replace('\n', "\\n")
24459 .replace('\r', "\\r")
24460 .replace('\t', "\\t");
24461 match target {
24462 DialectType::BigQuery => {
24463 // BigQuery: e'...' -> CAST(b'...' AS STRING)
24464 // Use Raw for the b'...' part to avoid double-escaping
24465 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
24466 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
24467 }
24468 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
24469 normalized,
24470 )))),
24471 }
24472 } else {
24473 Ok(e)
24474 }
24475 } else {
24476 Ok(e)
24477 }
24478 }
24479
24480 Action::StraightJoinCase => {
24481 // straight_join: keep lowercase for DuckDB, quote for MySQL
24482 if let Expression::Column(col) = e {
24483 if col.name.name == "STRAIGHT_JOIN" {
24484 let mut new_col = col;
24485 new_col.name.name = "straight_join".to_string();
24486 if matches!(target, DialectType::MySQL) {
24487 // MySQL: needs quoting since it's a reserved keyword
24488 new_col.name.quoted = true;
24489 }
24490 Ok(Expression::Column(new_col))
24491 } else {
24492 Ok(Expression::Column(col))
24493 }
24494 } else {
24495 Ok(e)
24496 }
24497 }
24498
24499 Action::TablesampleReservoir => {
24500 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
24501 if let Expression::TableSample(mut ts) = e {
24502 if let Some(ref mut sample) = ts.sample {
24503 sample.method = crate::expressions::SampleMethod::Reservoir;
24504 sample.explicit_method = true;
24505 }
24506 Ok(Expression::TableSample(ts))
24507 } else {
24508 Ok(e)
24509 }
24510 }
24511
24512 Action::TablesampleSnowflakeStrip => {
24513 // Strip method and PERCENT for Snowflake target from non-Snowflake source
24514 match e {
24515 Expression::TableSample(mut ts) => {
24516 if let Some(ref mut sample) = ts.sample {
24517 sample.suppress_method_output = true;
24518 sample.unit_after_size = false;
24519 sample.is_percent = false;
24520 }
24521 Ok(Expression::TableSample(ts))
24522 }
24523 Expression::Table(mut t) => {
24524 if let Some(ref mut sample) = t.table_sample {
24525 sample.suppress_method_output = true;
24526 sample.unit_after_size = false;
24527 sample.is_percent = false;
24528 }
24529 Ok(Expression::Table(t))
24530 }
24531 _ => Ok(e),
24532 }
24533 }
24534
24535 Action::FirstToAnyValue => {
24536 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
24537 if let Expression::First(mut agg) = e {
24538 agg.ignore_nulls = None;
24539 agg.name = Some("ANY_VALUE".to_string());
24540 Ok(Expression::AnyValue(agg))
24541 } else {
24542 Ok(e)
24543 }
24544 }
24545
24546 Action::ArrayIndexConvert => {
24547 // Subscript index: 1-based to 0-based for BigQuery
24548 if let Expression::Subscript(mut sub) = e {
24549 if let Expression::Literal(ref lit) = sub.index {
24550 if let Literal::Number(ref n) = lit.as_ref() {
24551 if let Ok(val) = n.parse::<i64>() {
24552 sub.index = Expression::Literal(Box::new(Literal::Number(
24553 (val - 1).to_string(),
24554 )));
24555 }
24556 }
24557 }
24558 Ok(Expression::Subscript(sub))
24559 } else {
24560 Ok(e)
24561 }
24562 }
24563
24564 Action::AnyValueIgnoreNulls => {
24565 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
24566 if let Expression::AnyValue(mut av) = e {
24567 if av.ignore_nulls.is_none() {
24568 av.ignore_nulls = Some(true);
24569 }
24570 Ok(Expression::AnyValue(av))
24571 } else {
24572 Ok(e)
24573 }
24574 }
24575
24576 Action::BigQueryNullsOrdering => {
24577 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
24578 if let Expression::WindowFunction(mut wf) = e {
24579 for o in &mut wf.over.order_by {
24580 o.nulls_first = None;
24581 }
24582 Ok(Expression::WindowFunction(wf))
24583 } else if let Expression::Ordered(mut o) = e {
24584 o.nulls_first = None;
24585 Ok(Expression::Ordered(o))
24586 } else {
24587 Ok(e)
24588 }
24589 }
24590
24591 Action::SnowflakeFloatProtect => {
24592 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
24593 // Snowflake's target transform from converting it to DOUBLE.
24594 // Non-Snowflake sources should keep their FLOAT spelling.
24595 if let Expression::DataType(DataType::Float { .. }) = e {
24596 Ok(Expression::DataType(DataType::Custom {
24597 name: "FLOAT".to_string(),
24598 }))
24599 } else {
24600 Ok(e)
24601 }
24602 }
24603
24604 Action::MysqlNullsOrdering => {
24605 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
24606 if let Expression::Ordered(mut o) = e {
24607 let nulls_last = o.nulls_first == Some(false);
24608 let desc = o.desc;
24609 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
24610 // If requested ordering matches default, just strip NULLS clause
24611 let matches_default = if desc {
24612 // DESC default is NULLS FIRST, so nulls_first=true matches
24613 o.nulls_first == Some(true)
24614 } else {
24615 // ASC default is NULLS LAST, so nulls_first=false matches
24616 nulls_last
24617 };
24618 if matches_default {
24619 o.nulls_first = None;
24620 Ok(Expression::Ordered(o))
24621 } else {
24622 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
24623 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
24624 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
24625 let null_val = if desc { 1 } else { 0 };
24626 let non_null_val = if desc { 0 } else { 1 };
24627 let _case_expr = Expression::Case(Box::new(Case {
24628 operand: None,
24629 whens: vec![(
24630 Expression::IsNull(Box::new(crate::expressions::IsNull {
24631 this: o.this.clone(),
24632 not: false,
24633 postfix_form: false,
24634 })),
24635 Expression::number(null_val),
24636 )],
24637 else_: Some(Expression::number(non_null_val)),
24638 comments: Vec::new(),
24639 inferred_type: None,
24640 }));
24641 o.nulls_first = None;
24642 // Return a tuple of [case_expr, ordered_expr]
24643 // We need to return both as part of the ORDER BY
24644 // But since transform_recursive processes individual expressions,
24645 // we can't easily add extra ORDER BY items here.
24646 // Instead, strip the nulls_first
24647 o.nulls_first = None;
24648 Ok(Expression::Ordered(o))
24649 }
24650 } else {
24651 Ok(e)
24652 }
24653 }
24654
24655 Action::MysqlNullsLastRewrite => {
24656 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
24657 // to simulate NULLS LAST for ASC ordering
24658 if let Expression::WindowFunction(mut wf) = e {
24659 let mut new_order_by = Vec::new();
24660 for o in wf.over.order_by {
24661 if !o.desc {
24662 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
24663 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
24664 let case_expr = Expression::Case(Box::new(Case {
24665 operand: None,
24666 whens: vec![(
24667 Expression::IsNull(Box::new(crate::expressions::IsNull {
24668 this: o.this.clone(),
24669 not: false,
24670 postfix_form: false,
24671 })),
24672 Expression::Literal(Box::new(Literal::Number(
24673 "1".to_string(),
24674 ))),
24675 )],
24676 else_: Some(Expression::Literal(Box::new(Literal::Number(
24677 "0".to_string(),
24678 )))),
24679 comments: Vec::new(),
24680 inferred_type: None,
24681 }));
24682 new_order_by.push(crate::expressions::Ordered {
24683 this: case_expr,
24684 desc: false,
24685 nulls_first: None,
24686 explicit_asc: false,
24687 with_fill: None,
24688 });
24689 let mut ordered = o;
24690 ordered.nulls_first = None;
24691 new_order_by.push(ordered);
24692 } else {
24693 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
24694 // No change needed
24695 let mut ordered = o;
24696 ordered.nulls_first = None;
24697 new_order_by.push(ordered);
24698 }
24699 }
24700 wf.over.order_by = new_order_by;
24701 Ok(Expression::WindowFunction(wf))
24702 } else {
24703 Ok(e)
24704 }
24705 }
24706
24707 Action::RespectNullsConvert => {
24708 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
24709 if let Expression::WindowFunction(mut wf) = e {
24710 match &mut wf.this {
24711 Expression::FirstValue(ref mut vf) => {
24712 if vf.ignore_nulls == Some(false) {
24713 vf.ignore_nulls = None;
24714 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
24715 // but that's handled by the generator's NULLS ordering
24716 }
24717 }
24718 Expression::LastValue(ref mut vf) => {
24719 if vf.ignore_nulls == Some(false) {
24720 vf.ignore_nulls = None;
24721 }
24722 }
24723 _ => {}
24724 }
24725 Ok(Expression::WindowFunction(wf))
24726 } else {
24727 Ok(e)
24728 }
24729 }
24730
24731 Action::SnowflakeWindowFrameStrip => {
24732 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24733 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
24734 if let Expression::WindowFunction(mut wf) = e {
24735 wf.over.frame = None;
24736 Ok(Expression::WindowFunction(wf))
24737 } else {
24738 Ok(e)
24739 }
24740 }
24741
24742 Action::SnowflakeWindowFrameAdd => {
24743 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24744 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
24745 if let Expression::WindowFunction(mut wf) = e {
24746 wf.over.frame = Some(crate::expressions::WindowFrame {
24747 kind: crate::expressions::WindowFrameKind::Rows,
24748 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
24749 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
24750 exclude: None,
24751 kind_text: None,
24752 start_side_text: None,
24753 end_side_text: None,
24754 });
24755 Ok(Expression::WindowFunction(wf))
24756 } else {
24757 Ok(e)
24758 }
24759 }
24760
24761 Action::CreateTableStripComment => {
24762 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
24763 if let Expression::CreateTable(mut ct) = e {
24764 for col in &mut ct.columns {
24765 col.comment = None;
24766 col.constraints.retain(|c| {
24767 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
24768 });
24769 // Also remove Comment from constraint_order
24770 col.constraint_order.retain(|c| {
24771 !matches!(c, crate::expressions::ConstraintType::Comment)
24772 });
24773 }
24774 // Strip properties (USING, PARTITIONED BY, etc.)
24775 ct.properties.clear();
24776 Ok(Expression::CreateTable(ct))
24777 } else {
24778 Ok(e)
24779 }
24780 }
24781
24782 Action::AlterTableToSpRename => {
24783 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
24784 if let Expression::AlterTable(ref at) = e {
24785 if let Some(crate::expressions::AlterTableAction::RenameTable(
24786 ref new_tbl,
24787 )) = at.actions.first()
24788 {
24789 // Build the old table name using TSQL bracket quoting
24790 let old_name = if let Some(ref schema) = at.name.schema {
24791 if at.name.name.quoted || schema.quoted {
24792 format!("[{}].[{}]", schema.name, at.name.name.name)
24793 } else {
24794 format!("{}.{}", schema.name, at.name.name.name)
24795 }
24796 } else {
24797 if at.name.name.quoted {
24798 format!("[{}]", at.name.name.name)
24799 } else {
24800 at.name.name.name.clone()
24801 }
24802 };
24803 let new_name = new_tbl.name.name.clone();
24804 // EXEC sp_rename 'old_name', 'new_name'
24805 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
24806 Ok(Expression::Raw(crate::expressions::Raw { sql }))
24807 } else {
24808 Ok(e)
24809 }
24810 } else {
24811 Ok(e)
24812 }
24813 }
24814
24815 Action::SnowflakeIntervalFormat => {
24816 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
24817 if let Expression::Interval(mut iv) = e {
24818 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
24819 (&iv.this, &iv.unit)
24820 {
24821 if let Literal::String(ref val) = lit.as_ref() {
24822 let unit_str = match unit_spec {
24823 crate::expressions::IntervalUnitSpec::Simple {
24824 unit, ..
24825 } => match unit {
24826 crate::expressions::IntervalUnit::Year => "YEAR",
24827 crate::expressions::IntervalUnit::Quarter => "QUARTER",
24828 crate::expressions::IntervalUnit::Month => "MONTH",
24829 crate::expressions::IntervalUnit::Week => "WEEK",
24830 crate::expressions::IntervalUnit::Day => "DAY",
24831 crate::expressions::IntervalUnit::Hour => "HOUR",
24832 crate::expressions::IntervalUnit::Minute => "MINUTE",
24833 crate::expressions::IntervalUnit::Second => "SECOND",
24834 crate::expressions::IntervalUnit::Millisecond => {
24835 "MILLISECOND"
24836 }
24837 crate::expressions::IntervalUnit::Microsecond => {
24838 "MICROSECOND"
24839 }
24840 crate::expressions::IntervalUnit::Nanosecond => {
24841 "NANOSECOND"
24842 }
24843 },
24844 _ => "",
24845 };
24846 if !unit_str.is_empty() {
24847 let combined = format!("{} {}", val, unit_str);
24848 iv.this = Some(Expression::Literal(Box::new(Literal::String(
24849 combined,
24850 ))));
24851 iv.unit = None;
24852 }
24853 }
24854 }
24855 Ok(Expression::Interval(iv))
24856 } else {
24857 Ok(e)
24858 }
24859 }
24860
24861 Action::ArrayConcatBracketConvert => {
24862 // Expression::Array/ArrayFunc -> target-specific
24863 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
24864 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
24865 match e {
24866 Expression::Array(arr) => {
24867 if matches!(target, DialectType::Redshift) {
24868 Ok(Expression::Function(Box::new(Function::new(
24869 "ARRAY".to_string(),
24870 arr.expressions,
24871 ))))
24872 } else {
24873 Ok(Expression::ArrayFunc(Box::new(
24874 crate::expressions::ArrayConstructor {
24875 expressions: arr.expressions,
24876 bracket_notation: false,
24877 use_list_keyword: false,
24878 },
24879 )))
24880 }
24881 }
24882 Expression::ArrayFunc(arr) => {
24883 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
24884 if matches!(target, DialectType::Redshift) {
24885 Ok(Expression::Function(Box::new(Function::new(
24886 "ARRAY".to_string(),
24887 arr.expressions,
24888 ))))
24889 } else {
24890 Ok(Expression::ArrayFunc(arr))
24891 }
24892 }
24893 _ => Ok(e),
24894 }
24895 }
24896
24897 Action::BitAggFloatCast => {
24898 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
24899 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24900 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24901 let int_type = DataType::Int {
24902 length: None,
24903 integer_spelling: false,
24904 };
24905 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
24906 if let Expression::Cast(c) = agg_this {
24907 match &c.to {
24908 DataType::Float { .. }
24909 | DataType::Double { .. }
24910 | DataType::Custom { .. } => {
24911 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24912 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
24913 let inner_type = match &c.to {
24914 DataType::Float {
24915 precision, scale, ..
24916 } => DataType::Float {
24917 precision: *precision,
24918 scale: *scale,
24919 real_spelling: true,
24920 },
24921 other => other.clone(),
24922 };
24923 let inner_cast =
24924 Expression::Cast(Box::new(crate::expressions::Cast {
24925 this: c.this.clone(),
24926 to: inner_type,
24927 trailing_comments: Vec::new(),
24928 double_colon_syntax: false,
24929 format: None,
24930 default: None,
24931 inferred_type: None,
24932 }));
24933 let rounded = Expression::Function(Box::new(Function::new(
24934 "ROUND".to_string(),
24935 vec![inner_cast],
24936 )));
24937 Expression::Cast(Box::new(crate::expressions::Cast {
24938 this: rounded,
24939 to: int_dt,
24940 trailing_comments: Vec::new(),
24941 double_colon_syntax: false,
24942 format: None,
24943 default: None,
24944 inferred_type: None,
24945 }))
24946 }
24947 DataType::Decimal { .. } => {
24948 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24949 Expression::Cast(Box::new(crate::expressions::Cast {
24950 this: Expression::Cast(c),
24951 to: int_dt,
24952 trailing_comments: Vec::new(),
24953 double_colon_syntax: false,
24954 format: None,
24955 default: None,
24956 inferred_type: None,
24957 }))
24958 }
24959 _ => Expression::Cast(c),
24960 }
24961 } else {
24962 agg_this
24963 }
24964 };
24965 match e {
24966 Expression::BitwiseOrAgg(mut f) => {
24967 f.this = wrap_agg(f.this, int_type);
24968 Ok(Expression::BitwiseOrAgg(f))
24969 }
24970 Expression::BitwiseAndAgg(mut f) => {
24971 let int_type = DataType::Int {
24972 length: None,
24973 integer_spelling: false,
24974 };
24975 f.this = wrap_agg(f.this, int_type);
24976 Ok(Expression::BitwiseAndAgg(f))
24977 }
24978 Expression::BitwiseXorAgg(mut f) => {
24979 let int_type = DataType::Int {
24980 length: None,
24981 integer_spelling: false,
24982 };
24983 f.this = wrap_agg(f.this, int_type);
24984 Ok(Expression::BitwiseXorAgg(f))
24985 }
24986 _ => Ok(e),
24987 }
24988 }
24989
24990 Action::BitAggSnowflakeRename => {
24991 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
24992 match e {
24993 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
24994 Function::new("BITORAGG".to_string(), vec![f.this]),
24995 ))),
24996 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
24997 Function::new("BITANDAGG".to_string(), vec![f.this]),
24998 ))),
24999 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
25000 Function::new("BITXORAGG".to_string(), vec![f.this]),
25001 ))),
25002 _ => Ok(e),
25003 }
25004 }
25005
25006 Action::StrftimeCastTimestamp => {
25007 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
25008 if let Expression::Cast(mut c) = e {
25009 if matches!(
25010 c.to,
25011 DataType::Timestamp {
25012 timezone: false,
25013 ..
25014 }
25015 ) {
25016 c.to = DataType::Custom {
25017 name: "TIMESTAMP_NTZ".to_string(),
25018 };
25019 }
25020 Ok(Expression::Cast(c))
25021 } else {
25022 Ok(e)
25023 }
25024 }
25025
25026 Action::DecimalDefaultPrecision => {
25027 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
25028 if let Expression::Cast(mut c) = e {
25029 if matches!(
25030 c.to,
25031 DataType::Decimal {
25032 precision: None,
25033 ..
25034 }
25035 ) {
25036 c.to = DataType::Decimal {
25037 precision: Some(18),
25038 scale: Some(3),
25039 };
25040 }
25041 Ok(Expression::Cast(c))
25042 } else {
25043 Ok(e)
25044 }
25045 }
25046
25047 Action::FilterToIff => {
25048 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
25049 if let Expression::Filter(f) = e {
25050 let condition = *f.expression;
25051 let agg = *f.this;
25052 // Strip WHERE from condition
25053 let cond = match condition {
25054 Expression::Where(w) => w.this,
25055 other => other,
25056 };
25057 // Extract the aggregate function and its argument
25058 // We want AVG(IFF(condition, x, NULL))
25059 match agg {
25060 Expression::Function(mut func) => {
25061 if !func.args.is_empty() {
25062 let orig_arg = func.args[0].clone();
25063 let iff_call = Expression::Function(Box::new(Function::new(
25064 "IFF".to_string(),
25065 vec![cond, orig_arg, Expression::Null(Null)],
25066 )));
25067 func.args[0] = iff_call;
25068 Ok(Expression::Function(func))
25069 } else {
25070 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
25071 this: Box::new(Expression::Function(func)),
25072 expression: Box::new(cond),
25073 })))
25074 }
25075 }
25076 Expression::Avg(mut avg) => {
25077 let iff_call = Expression::Function(Box::new(Function::new(
25078 "IFF".to_string(),
25079 vec![cond, avg.this.clone(), Expression::Null(Null)],
25080 )));
25081 avg.this = iff_call;
25082 Ok(Expression::Avg(avg))
25083 }
25084 Expression::Sum(mut s) => {
25085 let iff_call = Expression::Function(Box::new(Function::new(
25086 "IFF".to_string(),
25087 vec![cond, s.this.clone(), Expression::Null(Null)],
25088 )));
25089 s.this = iff_call;
25090 Ok(Expression::Sum(s))
25091 }
25092 Expression::Count(mut c) => {
25093 if let Some(ref this_expr) = c.this {
25094 let iff_call = Expression::Function(Box::new(Function::new(
25095 "IFF".to_string(),
25096 vec![cond, this_expr.clone(), Expression::Null(Null)],
25097 )));
25098 c.this = Some(iff_call);
25099 }
25100 Ok(Expression::Count(c))
25101 }
25102 other => {
25103 // Fallback: keep as Filter
25104 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
25105 this: Box::new(other),
25106 expression: Box::new(cond),
25107 })))
25108 }
25109 }
25110 } else {
25111 Ok(e)
25112 }
25113 }
25114
25115 Action::AggFilterToIff => {
25116 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
25117 // Helper macro to handle the common AggFunc case
25118 macro_rules! handle_agg_filter_to_iff {
25119 ($variant:ident, $agg:expr) => {{
25120 let mut agg = $agg;
25121 if let Some(filter_cond) = agg.filter.take() {
25122 let iff_call = Expression::Function(Box::new(Function::new(
25123 "IFF".to_string(),
25124 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
25125 )));
25126 agg.this = iff_call;
25127 }
25128 Ok(Expression::$variant(agg))
25129 }};
25130 }
25131
25132 match e {
25133 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
25134 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
25135 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
25136 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
25137 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
25138 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
25139 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
25140 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
25141 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
25142 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
25143 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
25144 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
25145 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
25146 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
25147 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
25148 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
25149 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
25150 Expression::ApproxDistinct(agg) => {
25151 handle_agg_filter_to_iff!(ApproxDistinct, agg)
25152 }
25153 Expression::Count(mut c) => {
25154 if let Some(filter_cond) = c.filter.take() {
25155 if let Some(ref this_expr) = c.this {
25156 let iff_call = Expression::Function(Box::new(Function::new(
25157 "IFF".to_string(),
25158 vec![
25159 filter_cond,
25160 this_expr.clone(),
25161 Expression::Null(Null),
25162 ],
25163 )));
25164 c.this = Some(iff_call);
25165 }
25166 }
25167 Ok(Expression::Count(c))
25168 }
25169 other => Ok(other),
25170 }
25171 }
25172
25173 Action::JsonToGetPath => {
25174 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
25175 if let Expression::JsonExtract(je) = e {
25176 // Convert to PARSE_JSON() wrapper:
25177 // - JSON(x) -> PARSE_JSON(x)
25178 // - PARSE_JSON(x) -> keep as-is
25179 // - anything else -> wrap in PARSE_JSON()
25180 let this = match &je.this {
25181 Expression::Function(f)
25182 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
25183 {
25184 Expression::Function(Box::new(Function::new(
25185 "PARSE_JSON".to_string(),
25186 f.args.clone(),
25187 )))
25188 }
25189 Expression::Function(f)
25190 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
25191 {
25192 je.this.clone()
25193 }
25194 // GET_PATH result is already JSON, don't wrap
25195 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
25196 je.this.clone()
25197 }
25198 other => {
25199 // Wrap non-JSON expressions in PARSE_JSON()
25200 Expression::Function(Box::new(Function::new(
25201 "PARSE_JSON".to_string(),
25202 vec![other.clone()],
25203 )))
25204 }
25205 };
25206 // Convert path: extract key from JSONPath or strip $. prefix from string
25207 let path = match &je.path {
25208 Expression::JSONPath(jp) => {
25209 // Extract the key from JSONPath: $root.key -> 'key'
25210 let mut key_parts = Vec::new();
25211 for expr in &jp.expressions {
25212 match expr {
25213 Expression::JSONPathRoot(_) => {} // skip root
25214 Expression::JSONPathKey(k) => {
25215 if let Expression::Literal(lit) = &*k.this {
25216 if let Literal::String(s) = lit.as_ref() {
25217 key_parts.push(s.clone());
25218 }
25219 }
25220 }
25221 _ => {}
25222 }
25223 }
25224 if !key_parts.is_empty() {
25225 Expression::Literal(Box::new(Literal::String(
25226 key_parts.join("."),
25227 )))
25228 } else {
25229 je.path.clone()
25230 }
25231 }
25232 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
25233 {
25234 let Literal::String(s) = lit.as_ref() else {
25235 unreachable!()
25236 };
25237 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
25238 Expression::Literal(Box::new(Literal::String(stripped)))
25239 }
25240 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
25241 {
25242 let Literal::String(s) = lit.as_ref() else {
25243 unreachable!()
25244 };
25245 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
25246 Expression::Literal(Box::new(Literal::String(stripped)))
25247 }
25248 _ => je.path.clone(),
25249 };
25250 Ok(Expression::Function(Box::new(Function::new(
25251 "GET_PATH".to_string(),
25252 vec![this, path],
25253 ))))
25254 } else {
25255 Ok(e)
25256 }
25257 }
25258
25259 Action::StructToRow => {
25260 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
25261 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
25262
25263 // Extract key-value pairs from either Struct or MapFunc
25264 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
25265 Expression::Struct(s) => Some(
25266 s.fields
25267 .iter()
25268 .map(|(opt_name, field_expr)| {
25269 if let Some(name) = opt_name {
25270 (name.clone(), field_expr.clone())
25271 } else if let Expression::NamedArgument(na) = field_expr {
25272 (na.name.name.clone(), na.value.clone())
25273 } else {
25274 (String::new(), field_expr.clone())
25275 }
25276 })
25277 .collect(),
25278 ),
25279 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
25280 m.keys
25281 .iter()
25282 .zip(m.values.iter())
25283 .map(|(key, value)| {
25284 let key_name = match key {
25285 Expression::Literal(lit)
25286 if matches!(lit.as_ref(), Literal::String(_)) =>
25287 {
25288 let Literal::String(s) = lit.as_ref() else {
25289 unreachable!()
25290 };
25291 s.clone()
25292 }
25293 Expression::Identifier(id) => id.name.clone(),
25294 _ => String::new(),
25295 };
25296 (key_name, value.clone())
25297 })
25298 .collect(),
25299 ),
25300 _ => None,
25301 };
25302
25303 if let Some(pairs) = kv_pairs {
25304 let mut named_args = Vec::new();
25305 for (key_name, value) in pairs {
25306 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
25307 named_args.push(Expression::Alias(Box::new(
25308 crate::expressions::Alias::new(
25309 value,
25310 Identifier::new(key_name),
25311 ),
25312 )));
25313 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
25314 named_args.push(value);
25315 } else {
25316 named_args.push(value);
25317 }
25318 }
25319
25320 if matches!(target, DialectType::BigQuery) {
25321 Ok(Expression::Function(Box::new(Function::new(
25322 "STRUCT".to_string(),
25323 named_args,
25324 ))))
25325 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
25326 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
25327 let row_func = Expression::Function(Box::new(Function::new(
25328 "ROW".to_string(),
25329 named_args,
25330 )));
25331
25332 // Try to infer types for each pair
25333 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
25334 Expression::Struct(s) => Some(
25335 s.fields
25336 .iter()
25337 .map(|(opt_name, field_expr)| {
25338 if let Some(name) = opt_name {
25339 (name.clone(), field_expr.clone())
25340 } else if let Expression::NamedArgument(na) = field_expr
25341 {
25342 (na.name.name.clone(), na.value.clone())
25343 } else {
25344 (String::new(), field_expr.clone())
25345 }
25346 })
25347 .collect(),
25348 ),
25349 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
25350 m.keys
25351 .iter()
25352 .zip(m.values.iter())
25353 .map(|(key, value)| {
25354 let key_name = match key {
25355 Expression::Literal(lit)
25356 if matches!(
25357 lit.as_ref(),
25358 Literal::String(_)
25359 ) =>
25360 {
25361 let Literal::String(s) = lit.as_ref() else {
25362 unreachable!()
25363 };
25364 s.clone()
25365 }
25366 Expression::Identifier(id) => id.name.clone(),
25367 _ => String::new(),
25368 };
25369 (key_name, value.clone())
25370 })
25371 .collect(),
25372 ),
25373 _ => None,
25374 };
25375
25376 if let Some(pairs) = kv_pairs_again {
25377 // Infer types for all values
25378 let mut all_inferred = true;
25379 let mut fields = Vec::new();
25380 for (name, value) in &pairs {
25381 let inferred_type = match value {
25382 Expression::Literal(lit)
25383 if matches!(lit.as_ref(), Literal::Number(_)) =>
25384 {
25385 let Literal::Number(n) = lit.as_ref() else {
25386 unreachable!()
25387 };
25388 if n.contains('.') {
25389 Some(DataType::Double {
25390 precision: None,
25391 scale: None,
25392 })
25393 } else {
25394 Some(DataType::Int {
25395 length: None,
25396 integer_spelling: true,
25397 })
25398 }
25399 }
25400 Expression::Literal(lit)
25401 if matches!(lit.as_ref(), Literal::String(_)) =>
25402 {
25403 Some(DataType::VarChar {
25404 length: None,
25405 parenthesized_length: false,
25406 })
25407 }
25408 Expression::Boolean(_) => Some(DataType::Boolean),
25409 _ => None,
25410 };
25411 if let Some(dt) = inferred_type {
25412 fields.push(crate::expressions::StructField::new(
25413 name.clone(),
25414 dt,
25415 ));
25416 } else {
25417 all_inferred = false;
25418 break;
25419 }
25420 }
25421
25422 if all_inferred && !fields.is_empty() {
25423 let row_type = DataType::Struct {
25424 fields,
25425 nested: true,
25426 };
25427 Ok(Expression::Cast(Box::new(Cast {
25428 this: row_func,
25429 to: row_type,
25430 trailing_comments: Vec::new(),
25431 double_colon_syntax: false,
25432 format: None,
25433 default: None,
25434 inferred_type: None,
25435 })))
25436 } else {
25437 Ok(row_func)
25438 }
25439 } else {
25440 Ok(row_func)
25441 }
25442 } else {
25443 Ok(Expression::Function(Box::new(Function::new(
25444 "ROW".to_string(),
25445 named_args,
25446 ))))
25447 }
25448 } else {
25449 Ok(e)
25450 }
25451 }
25452
25453 Action::SparkStructConvert => {
25454 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
25455 // or DuckDB {'name': val, ...}
25456 if let Expression::Function(f) = e {
25457 // Extract name-value pairs from aliased args
25458 let mut pairs: Vec<(String, Expression)> = Vec::new();
25459 for arg in &f.args {
25460 match arg {
25461 Expression::Alias(a) => {
25462 pairs.push((a.alias.name.clone(), a.this.clone()));
25463 }
25464 _ => {
25465 pairs.push((String::new(), arg.clone()));
25466 }
25467 }
25468 }
25469
25470 match target {
25471 DialectType::DuckDB => {
25472 // Convert to DuckDB struct literal {'name': value, ...}
25473 let mut keys = Vec::new();
25474 let mut values = Vec::new();
25475 for (name, value) in &pairs {
25476 keys.push(Expression::Literal(Box::new(Literal::String(
25477 name.clone(),
25478 ))));
25479 values.push(value.clone());
25480 }
25481 Ok(Expression::MapFunc(Box::new(
25482 crate::expressions::MapConstructor {
25483 keys,
25484 values,
25485 curly_brace_syntax: true,
25486 with_map_keyword: false,
25487 },
25488 )))
25489 }
25490 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25491 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
25492 let row_args: Vec<Expression> =
25493 pairs.iter().map(|(_, v)| v.clone()).collect();
25494 let row_func = Expression::Function(Box::new(Function::new(
25495 "ROW".to_string(),
25496 row_args,
25497 )));
25498
25499 // Infer types
25500 let mut all_inferred = true;
25501 let mut fields = Vec::new();
25502 for (name, value) in &pairs {
25503 let inferred_type = match value {
25504 Expression::Literal(lit)
25505 if matches!(lit.as_ref(), Literal::Number(_)) =>
25506 {
25507 let Literal::Number(n) = lit.as_ref() else {
25508 unreachable!()
25509 };
25510 if n.contains('.') {
25511 Some(DataType::Double {
25512 precision: None,
25513 scale: None,
25514 })
25515 } else {
25516 Some(DataType::Int {
25517 length: None,
25518 integer_spelling: true,
25519 })
25520 }
25521 }
25522 Expression::Literal(lit)
25523 if matches!(lit.as_ref(), Literal::String(_)) =>
25524 {
25525 Some(DataType::VarChar {
25526 length: None,
25527 parenthesized_length: false,
25528 })
25529 }
25530 Expression::Boolean(_) => Some(DataType::Boolean),
25531 _ => None,
25532 };
25533 if let Some(dt) = inferred_type {
25534 fields.push(crate::expressions::StructField::new(
25535 name.clone(),
25536 dt,
25537 ));
25538 } else {
25539 all_inferred = false;
25540 break;
25541 }
25542 }
25543
25544 if all_inferred && !fields.is_empty() {
25545 let row_type = DataType::Struct {
25546 fields,
25547 nested: true,
25548 };
25549 Ok(Expression::Cast(Box::new(Cast {
25550 this: row_func,
25551 to: row_type,
25552 trailing_comments: Vec::new(),
25553 double_colon_syntax: false,
25554 format: None,
25555 default: None,
25556 inferred_type: None,
25557 })))
25558 } else {
25559 Ok(row_func)
25560 }
25561 }
25562 _ => Ok(Expression::Function(f)),
25563 }
25564 } else {
25565 Ok(e)
25566 }
25567 }
25568
25569 Action::ApproxCountDistinctToApproxDistinct => {
25570 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
25571 if let Expression::ApproxCountDistinct(f) = e {
25572 Ok(Expression::ApproxDistinct(f))
25573 } else {
25574 Ok(e)
25575 }
25576 }
25577
25578 Action::CollectListToArrayAgg => {
25579 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
25580 if let Expression::AggregateFunction(f) = e {
25581 let filter_expr = if !f.args.is_empty() {
25582 let arg = f.args[0].clone();
25583 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
25584 this: arg,
25585 not: true,
25586 postfix_form: false,
25587 })))
25588 } else {
25589 None
25590 };
25591 let agg = crate::expressions::AggFunc {
25592 this: if f.args.is_empty() {
25593 Expression::Null(crate::expressions::Null)
25594 } else {
25595 f.args[0].clone()
25596 },
25597 distinct: f.distinct,
25598 order_by: f.order_by.clone(),
25599 filter: filter_expr,
25600 ignore_nulls: None,
25601 name: None,
25602 having_max: None,
25603 limit: None,
25604 inferred_type: None,
25605 };
25606 Ok(Expression::ArrayAgg(Box::new(agg)))
25607 } else {
25608 Ok(e)
25609 }
25610 }
25611
25612 Action::CollectSetConvert => {
25613 // COLLECT_SET(x) -> target-specific
25614 if let Expression::AggregateFunction(f) = e {
25615 match target {
25616 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
25617 crate::expressions::AggregateFunction {
25618 name: "SET_AGG".to_string(),
25619 args: f.args,
25620 distinct: false,
25621 order_by: f.order_by,
25622 filter: f.filter,
25623 limit: f.limit,
25624 ignore_nulls: f.ignore_nulls,
25625 inferred_type: None,
25626 },
25627 ))),
25628 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
25629 crate::expressions::AggregateFunction {
25630 name: "ARRAY_UNIQUE_AGG".to_string(),
25631 args: f.args,
25632 distinct: false,
25633 order_by: f.order_by,
25634 filter: f.filter,
25635 limit: f.limit,
25636 ignore_nulls: f.ignore_nulls,
25637 inferred_type: None,
25638 },
25639 ))),
25640 DialectType::Trino | DialectType::DuckDB => {
25641 let agg = crate::expressions::AggFunc {
25642 this: if f.args.is_empty() {
25643 Expression::Null(crate::expressions::Null)
25644 } else {
25645 f.args[0].clone()
25646 },
25647 distinct: true,
25648 order_by: Vec::new(),
25649 filter: None,
25650 ignore_nulls: None,
25651 name: None,
25652 having_max: None,
25653 limit: None,
25654 inferred_type: None,
25655 };
25656 Ok(Expression::ArrayAgg(Box::new(agg)))
25657 }
25658 _ => Ok(Expression::AggregateFunction(f)),
25659 }
25660 } else {
25661 Ok(e)
25662 }
25663 }
25664
25665 Action::PercentileConvert => {
25666 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
25667 if let Expression::AggregateFunction(f) = e {
25668 let name = match target {
25669 DialectType::DuckDB => "QUANTILE",
25670 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
25671 _ => "PERCENTILE",
25672 };
25673 Ok(Expression::AggregateFunction(Box::new(
25674 crate::expressions::AggregateFunction {
25675 name: name.to_string(),
25676 args: f.args,
25677 distinct: f.distinct,
25678 order_by: f.order_by,
25679 filter: f.filter,
25680 limit: f.limit,
25681 ignore_nulls: f.ignore_nulls,
25682 inferred_type: None,
25683 },
25684 )))
25685 } else {
25686 Ok(e)
25687 }
25688 }
25689
25690 Action::CorrIsnanWrap => {
25691 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
25692 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
25693 let corr_clone = e.clone();
25694 let isnan = Expression::Function(Box::new(Function::new(
25695 "ISNAN".to_string(),
25696 vec![corr_clone.clone()],
25697 )));
25698 let case_expr = Expression::Case(Box::new(Case {
25699 operand: None,
25700 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
25701 else_: Some(corr_clone),
25702 comments: Vec::new(),
25703 inferred_type: None,
25704 }));
25705 Ok(case_expr)
25706 }
25707
25708 Action::TruncToDateTrunc => {
25709 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
25710 if let Expression::Function(f) = e {
25711 if f.args.len() == 2 {
25712 let timestamp = f.args[0].clone();
25713 let unit_expr = f.args[1].clone();
25714
25715 if matches!(target, DialectType::ClickHouse) {
25716 // For ClickHouse, produce Expression::DateTrunc which the generator
25717 // outputs as DATE_TRUNC(...) without going through the ClickHouse
25718 // target transform that would convert it to dateTrunc
25719 let unit_str = Self::get_unit_str_static(&unit_expr);
25720 let dt_field = match unit_str.as_str() {
25721 "YEAR" => DateTimeField::Year,
25722 "MONTH" => DateTimeField::Month,
25723 "DAY" => DateTimeField::Day,
25724 "HOUR" => DateTimeField::Hour,
25725 "MINUTE" => DateTimeField::Minute,
25726 "SECOND" => DateTimeField::Second,
25727 "WEEK" => DateTimeField::Week,
25728 "QUARTER" => DateTimeField::Quarter,
25729 _ => DateTimeField::Custom(unit_str),
25730 };
25731 Ok(Expression::DateTrunc(Box::new(
25732 crate::expressions::DateTruncFunc {
25733 this: timestamp,
25734 unit: dt_field,
25735 },
25736 )))
25737 } else {
25738 let new_args = vec![unit_expr, timestamp];
25739 Ok(Expression::Function(Box::new(Function::new(
25740 "DATE_TRUNC".to_string(),
25741 new_args,
25742 ))))
25743 }
25744 } else {
25745 Ok(Expression::Function(f))
25746 }
25747 } else {
25748 Ok(e)
25749 }
25750 }
25751
25752 Action::ArrayContainsConvert => {
25753 if let Expression::ArrayContains(f) = e {
25754 match target {
25755 DialectType::Presto | DialectType::Trino => {
25756 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
25757 Ok(Expression::Function(Box::new(Function::new(
25758 "CONTAINS".to_string(),
25759 vec![f.this, f.expression],
25760 ))))
25761 }
25762 DialectType::Snowflake => {
25763 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
25764 let cast_val =
25765 Expression::Cast(Box::new(crate::expressions::Cast {
25766 this: f.expression,
25767 to: crate::expressions::DataType::Custom {
25768 name: "VARIANT".to_string(),
25769 },
25770 trailing_comments: Vec::new(),
25771 double_colon_syntax: false,
25772 format: None,
25773 default: None,
25774 inferred_type: None,
25775 }));
25776 Ok(Expression::Function(Box::new(Function::new(
25777 "ARRAY_CONTAINS".to_string(),
25778 vec![cast_val, f.this],
25779 ))))
25780 }
25781 _ => Ok(Expression::ArrayContains(f)),
25782 }
25783 } else {
25784 Ok(e)
25785 }
25786 }
25787
25788 Action::ArrayExceptConvert => {
25789 if let Expression::ArrayExcept(f) = e {
25790 let source_arr = f.this;
25791 let exclude_arr = f.expression;
25792 match target {
25793 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
25794 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
25795 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25796 // ELSE LIST_TRANSFORM(LIST_FILTER(
25797 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
25798 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
25799 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
25800 // pair -> pair[1])
25801 // END
25802
25803 // Build null check
25804 let source_is_null =
25805 Expression::IsNull(Box::new(crate::expressions::IsNull {
25806 this: source_arr.clone(),
25807 not: false,
25808 postfix_form: false,
25809 }));
25810 let exclude_is_null =
25811 Expression::IsNull(Box::new(crate::expressions::IsNull {
25812 this: exclude_arr.clone(),
25813 not: false,
25814 postfix_form: false,
25815 }));
25816 let null_check =
25817 Expression::Or(Box::new(crate::expressions::BinaryOp {
25818 left: source_is_null,
25819 right: exclude_is_null,
25820 left_comments: vec![],
25821 operator_comments: vec![],
25822 trailing_comments: vec![],
25823 inferred_type: None,
25824 }));
25825
25826 // GENERATE_SERIES(1, LENGTH(source))
25827 let gen_series = Expression::Function(Box::new(Function::new(
25828 "GENERATE_SERIES".to_string(),
25829 vec![
25830 Expression::number(1),
25831 Expression::Function(Box::new(Function::new(
25832 "LENGTH".to_string(),
25833 vec![source_arr.clone()],
25834 ))),
25835 ],
25836 )));
25837
25838 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
25839 let list_zip = Expression::Function(Box::new(Function::new(
25840 "LIST_ZIP".to_string(),
25841 vec![source_arr.clone(), gen_series],
25842 )));
25843
25844 // pair[1] and pair[2]
25845 let pair_col = Expression::column("pair");
25846 let pair_1 = Expression::Subscript(Box::new(
25847 crate::expressions::Subscript {
25848 this: pair_col.clone(),
25849 index: Expression::number(1),
25850 },
25851 ));
25852 let pair_2 = Expression::Subscript(Box::new(
25853 crate::expressions::Subscript {
25854 this: pair_col.clone(),
25855 index: Expression::number(2),
25856 },
25857 ));
25858
25859 // source[1:pair[2]]
25860 let source_slice = Expression::ArraySlice(Box::new(
25861 crate::expressions::ArraySlice {
25862 this: source_arr.clone(),
25863 start: Some(Expression::number(1)),
25864 end: Some(pair_2),
25865 },
25866 ));
25867
25868 let e_col = Expression::column("e");
25869
25870 // e -> e IS NOT DISTINCT FROM pair[1]
25871 let inner_lambda1 =
25872 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25873 parameters: vec![crate::expressions::Identifier::new("e")],
25874 body: Expression::NullSafeEq(Box::new(
25875 crate::expressions::BinaryOp {
25876 left: e_col.clone(),
25877 right: pair_1.clone(),
25878 left_comments: vec![],
25879 operator_comments: vec![],
25880 trailing_comments: vec![],
25881 inferred_type: None,
25882 },
25883 )),
25884 colon: false,
25885 parameter_types: vec![],
25886 }));
25887
25888 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
25889 let inner_filter1 = Expression::Function(Box::new(Function::new(
25890 "LIST_FILTER".to_string(),
25891 vec![source_slice, inner_lambda1],
25892 )));
25893
25894 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
25895 let len1 = Expression::Function(Box::new(Function::new(
25896 "LENGTH".to_string(),
25897 vec![inner_filter1],
25898 )));
25899
25900 // e -> e IS NOT DISTINCT FROM pair[1]
25901 let inner_lambda2 =
25902 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25903 parameters: vec![crate::expressions::Identifier::new("e")],
25904 body: Expression::NullSafeEq(Box::new(
25905 crate::expressions::BinaryOp {
25906 left: e_col,
25907 right: pair_1.clone(),
25908 left_comments: vec![],
25909 operator_comments: vec![],
25910 trailing_comments: vec![],
25911 inferred_type: None,
25912 },
25913 )),
25914 colon: false,
25915 parameter_types: vec![],
25916 }));
25917
25918 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
25919 let inner_filter2 = Expression::Function(Box::new(Function::new(
25920 "LIST_FILTER".to_string(),
25921 vec![exclude_arr.clone(), inner_lambda2],
25922 )));
25923
25924 // LENGTH(LIST_FILTER(exclude, ...))
25925 let len2 = Expression::Function(Box::new(Function::new(
25926 "LENGTH".to_string(),
25927 vec![inner_filter2],
25928 )));
25929
25930 // (LENGTH(...) > LENGTH(...))
25931 let cond = Expression::Paren(Box::new(Paren {
25932 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
25933 left: len1,
25934 right: len2,
25935 left_comments: vec![],
25936 operator_comments: vec![],
25937 trailing_comments: vec![],
25938 inferred_type: None,
25939 })),
25940 trailing_comments: vec![],
25941 }));
25942
25943 // pair -> (condition)
25944 let filter_lambda =
25945 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25946 parameters: vec![crate::expressions::Identifier::new(
25947 "pair",
25948 )],
25949 body: cond,
25950 colon: false,
25951 parameter_types: vec![],
25952 }));
25953
25954 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
25955 let outer_filter = Expression::Function(Box::new(Function::new(
25956 "LIST_FILTER".to_string(),
25957 vec![list_zip, filter_lambda],
25958 )));
25959
25960 // pair -> pair[1]
25961 let transform_lambda =
25962 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25963 parameters: vec![crate::expressions::Identifier::new(
25964 "pair",
25965 )],
25966 body: pair_1,
25967 colon: false,
25968 parameter_types: vec![],
25969 }));
25970
25971 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
25972 let list_transform = Expression::Function(Box::new(Function::new(
25973 "LIST_TRANSFORM".to_string(),
25974 vec![outer_filter, transform_lambda],
25975 )));
25976
25977 Ok(Expression::Case(Box::new(Case {
25978 operand: None,
25979 whens: vec![(null_check, Expression::Null(Null))],
25980 else_: Some(list_transform),
25981 comments: Vec::new(),
25982 inferred_type: None,
25983 })))
25984 }
25985 DialectType::DuckDB => {
25986 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
25987 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25988 // ELSE LIST_FILTER(LIST_DISTINCT(source),
25989 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
25990 // END
25991
25992 // Build: source IS NULL
25993 let source_is_null =
25994 Expression::IsNull(Box::new(crate::expressions::IsNull {
25995 this: source_arr.clone(),
25996 not: false,
25997 postfix_form: false,
25998 }));
25999 // Build: exclude IS NULL
26000 let exclude_is_null =
26001 Expression::IsNull(Box::new(crate::expressions::IsNull {
26002 this: exclude_arr.clone(),
26003 not: false,
26004 postfix_form: false,
26005 }));
26006 // source IS NULL OR exclude IS NULL
26007 let null_check =
26008 Expression::Or(Box::new(crate::expressions::BinaryOp {
26009 left: source_is_null,
26010 right: exclude_is_null,
26011 left_comments: vec![],
26012 operator_comments: vec![],
26013 trailing_comments: vec![],
26014 inferred_type: None,
26015 }));
26016
26017 // LIST_DISTINCT(source)
26018 let list_distinct = Expression::Function(Box::new(Function::new(
26019 "LIST_DISTINCT".to_string(),
26020 vec![source_arr.clone()],
26021 )));
26022
26023 // x IS NOT DISTINCT FROM e
26024 let x_col = Expression::column("x");
26025 let e_col = Expression::column("e");
26026 let is_not_distinct = Expression::NullSafeEq(Box::new(
26027 crate::expressions::BinaryOp {
26028 left: x_col,
26029 right: e_col.clone(),
26030 left_comments: vec![],
26031 operator_comments: vec![],
26032 trailing_comments: vec![],
26033 inferred_type: None,
26034 },
26035 ));
26036
26037 // x -> x IS NOT DISTINCT FROM e
26038 let inner_lambda =
26039 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26040 parameters: vec![crate::expressions::Identifier::new("x")],
26041 body: is_not_distinct,
26042 colon: false,
26043 parameter_types: vec![],
26044 }));
26045
26046 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
26047 let inner_list_filter =
26048 Expression::Function(Box::new(Function::new(
26049 "LIST_FILTER".to_string(),
26050 vec![exclude_arr.clone(), inner_lambda],
26051 )));
26052
26053 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
26054 let len_inner = Expression::Function(Box::new(Function::new(
26055 "LENGTH".to_string(),
26056 vec![inner_list_filter],
26057 )));
26058
26059 // LENGTH(...) = 0
26060 let eq_zero =
26061 Expression::Eq(Box::new(crate::expressions::BinaryOp {
26062 left: len_inner,
26063 right: Expression::number(0),
26064 left_comments: vec![],
26065 operator_comments: vec![],
26066 trailing_comments: vec![],
26067 inferred_type: None,
26068 }));
26069
26070 // e -> LENGTH(LIST_FILTER(...)) = 0
26071 let outer_lambda =
26072 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26073 parameters: vec![crate::expressions::Identifier::new("e")],
26074 body: eq_zero,
26075 colon: false,
26076 parameter_types: vec![],
26077 }));
26078
26079 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
26080 let outer_list_filter =
26081 Expression::Function(Box::new(Function::new(
26082 "LIST_FILTER".to_string(),
26083 vec![list_distinct, outer_lambda],
26084 )));
26085
26086 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
26087 Ok(Expression::Case(Box::new(Case {
26088 operand: None,
26089 whens: vec![(null_check, Expression::Null(Null))],
26090 else_: Some(outer_list_filter),
26091 comments: Vec::new(),
26092 inferred_type: None,
26093 })))
26094 }
26095 DialectType::Snowflake => {
26096 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
26097 Ok(Expression::ArrayExcept(Box::new(
26098 crate::expressions::BinaryFunc {
26099 this: source_arr,
26100 expression: exclude_arr,
26101 original_name: None,
26102 inferred_type: None,
26103 },
26104 )))
26105 }
26106 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26107 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
26108 Ok(Expression::Function(Box::new(Function::new(
26109 "ARRAY_EXCEPT".to_string(),
26110 vec![source_arr, exclude_arr],
26111 ))))
26112 }
26113 _ => Ok(Expression::ArrayExcept(Box::new(
26114 crate::expressions::BinaryFunc {
26115 this: source_arr,
26116 expression: exclude_arr,
26117 original_name: None,
26118 inferred_type: None,
26119 },
26120 ))),
26121 }
26122 } else {
26123 Ok(e)
26124 }
26125 }
26126
26127 Action::RegexpLikeExasolAnchor => {
26128 // RegexpLike -> Exasol: wrap pattern with .*...*
26129 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
26130 // dialects does partial match, so we need to anchor with .* on both sides
26131 if let Expression::RegexpLike(mut f) = e {
26132 match &f.pattern {
26133 Expression::Literal(lit)
26134 if matches!(lit.as_ref(), Literal::String(_)) =>
26135 {
26136 let Literal::String(s) = lit.as_ref() else {
26137 unreachable!()
26138 };
26139 // String literal: wrap with .*...*
26140 f.pattern = Expression::Literal(Box::new(Literal::String(
26141 format!(".*{}.*", s),
26142 )));
26143 }
26144 _ => {
26145 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
26146 f.pattern =
26147 Expression::Paren(Box::new(crate::expressions::Paren {
26148 this: Expression::Concat(Box::new(
26149 crate::expressions::BinaryOp {
26150 left: Expression::Concat(Box::new(
26151 crate::expressions::BinaryOp {
26152 left: Expression::Literal(Box::new(
26153 Literal::String(".*".to_string()),
26154 )),
26155 right: f.pattern,
26156 left_comments: vec![],
26157 operator_comments: vec![],
26158 trailing_comments: vec![],
26159 inferred_type: None,
26160 },
26161 )),
26162 right: Expression::Literal(Box::new(
26163 Literal::String(".*".to_string()),
26164 )),
26165 left_comments: vec![],
26166 operator_comments: vec![],
26167 trailing_comments: vec![],
26168 inferred_type: None,
26169 },
26170 )),
26171 trailing_comments: vec![],
26172 }));
26173 }
26174 }
26175 Ok(Expression::RegexpLike(f))
26176 } else {
26177 Ok(e)
26178 }
26179 }
26180
26181 Action::ArrayPositionSnowflakeSwap => {
26182 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
26183 if let Expression::ArrayPosition(f) = e {
26184 Ok(Expression::ArrayPosition(Box::new(
26185 crate::expressions::BinaryFunc {
26186 this: f.expression,
26187 expression: f.this,
26188 original_name: f.original_name,
26189 inferred_type: f.inferred_type,
26190 },
26191 )))
26192 } else {
26193 Ok(e)
26194 }
26195 }
26196
26197 Action::SnowflakeArrayPositionToDuckDB => {
26198 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
26199 // Snowflake uses 0-based indexing, DuckDB uses 1-based
26200 // The parser has this=value, expression=array (Snowflake order)
26201 if let Expression::ArrayPosition(f) = e {
26202 // Create ARRAY_POSITION(array, value) in standard order
26203 let standard_pos =
26204 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
26205 this: f.expression, // array
26206 expression: f.this, // value
26207 original_name: f.original_name,
26208 inferred_type: f.inferred_type,
26209 }));
26210 // Subtract 1 for zero-based indexing
26211 Ok(Expression::Sub(Box::new(BinaryOp {
26212 left: standard_pos,
26213 right: Expression::number(1),
26214 left_comments: vec![],
26215 operator_comments: vec![],
26216 trailing_comments: vec![],
26217 inferred_type: None,
26218 })))
26219 } else {
26220 Ok(e)
26221 }
26222 }
26223
26224 Action::ArrayDistinctConvert => {
26225 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
26226 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
26227 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
26228 // ELSE LIST_DISTINCT(arr)
26229 // END
26230 if let Expression::ArrayDistinct(f) = e {
26231 let arr = f.this;
26232
26233 // ARRAY_LENGTH(arr)
26234 let array_length = Expression::Function(Box::new(Function::new(
26235 "ARRAY_LENGTH".to_string(),
26236 vec![arr.clone()],
26237 )));
26238 // LIST_COUNT(arr)
26239 let list_count = Expression::Function(Box::new(Function::new(
26240 "LIST_COUNT".to_string(),
26241 vec![arr.clone()],
26242 )));
26243 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
26244 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
26245 left: array_length,
26246 right: list_count,
26247 left_comments: vec![],
26248 operator_comments: vec![],
26249 trailing_comments: vec![],
26250 inferred_type: None,
26251 }));
26252
26253 // _u column
26254 let u_col = Expression::column("_u");
26255 // NOT _u IS NULL
26256 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
26257 this: u_col.clone(),
26258 not: false,
26259 postfix_form: false,
26260 }));
26261 let not_u_is_null =
26262 Expression::Not(Box::new(crate::expressions::UnaryOp {
26263 this: u_is_null,
26264 inferred_type: None,
26265 }));
26266 // _u -> NOT _u IS NULL
26267 let filter_lambda =
26268 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26269 parameters: vec![crate::expressions::Identifier::new("_u")],
26270 body: not_u_is_null,
26271 colon: false,
26272 parameter_types: vec![],
26273 }));
26274 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
26275 let list_filter = Expression::Function(Box::new(Function::new(
26276 "LIST_FILTER".to_string(),
26277 vec![arr.clone(), filter_lambda],
26278 )));
26279 // LIST_DISTINCT(LIST_FILTER(arr, ...))
26280 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
26281 "LIST_DISTINCT".to_string(),
26282 vec![list_filter],
26283 )));
26284 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
26285 let list_append = Expression::Function(Box::new(Function::new(
26286 "LIST_APPEND".to_string(),
26287 vec![list_distinct_filtered, Expression::Null(Null)],
26288 )));
26289
26290 // LIST_DISTINCT(arr)
26291 let list_distinct = Expression::Function(Box::new(Function::new(
26292 "LIST_DISTINCT".to_string(),
26293 vec![arr],
26294 )));
26295
26296 // CASE WHEN neq THEN list_append ELSE list_distinct END
26297 Ok(Expression::Case(Box::new(Case {
26298 operand: None,
26299 whens: vec![(neq, list_append)],
26300 else_: Some(list_distinct),
26301 comments: Vec::new(),
26302 inferred_type: None,
26303 })))
26304 } else {
26305 Ok(e)
26306 }
26307 }
26308
26309 Action::ArrayDistinctClickHouse => {
26310 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
26311 if let Expression::ArrayDistinct(f) = e {
26312 Ok(Expression::Function(Box::new(Function::new(
26313 "arrayDistinct".to_string(),
26314 vec![f.this],
26315 ))))
26316 } else {
26317 Ok(e)
26318 }
26319 }
26320
26321 Action::ArrayContainsDuckDBConvert => {
26322 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
26323 // CASE WHEN value IS NULL
26324 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
26325 // ELSE ARRAY_CONTAINS(array, value)
26326 // END
26327 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
26328 if let Expression::ArrayContains(f) = e {
26329 let value = f.this;
26330 let array = f.expression;
26331
26332 // value IS NULL
26333 let value_is_null =
26334 Expression::IsNull(Box::new(crate::expressions::IsNull {
26335 this: value.clone(),
26336 not: false,
26337 postfix_form: false,
26338 }));
26339
26340 // ARRAY_LENGTH(array)
26341 let array_length = Expression::Function(Box::new(Function::new(
26342 "ARRAY_LENGTH".to_string(),
26343 vec![array.clone()],
26344 )));
26345 // LIST_COUNT(array)
26346 let list_count = Expression::Function(Box::new(Function::new(
26347 "LIST_COUNT".to_string(),
26348 vec![array.clone()],
26349 )));
26350 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
26351 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
26352 left: array_length,
26353 right: list_count,
26354 left_comments: vec![],
26355 operator_comments: vec![],
26356 trailing_comments: vec![],
26357 inferred_type: None,
26358 }));
26359 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
26360 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
26361 this: Box::new(neq),
26362 expression: Box::new(Expression::Boolean(
26363 crate::expressions::BooleanLiteral { value: false },
26364 )),
26365 }));
26366
26367 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
26368 let array_contains = Expression::Function(Box::new(Function::new(
26369 "ARRAY_CONTAINS".to_string(),
26370 vec![array, value],
26371 )));
26372
26373 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
26374 Ok(Expression::Case(Box::new(Case {
26375 operand: None,
26376 whens: vec![(value_is_null, nullif)],
26377 else_: Some(array_contains),
26378 comments: Vec::new(),
26379 inferred_type: None,
26380 })))
26381 } else {
26382 Ok(e)
26383 }
26384 }
26385
26386 Action::StrPositionExpand => {
26387 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
26388 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26389 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26390 if let Expression::StrPosition(sp) = e {
26391 let crate::expressions::StrPosition {
26392 this,
26393 substr,
26394 position,
26395 occurrence,
26396 } = *sp;
26397 let string = *this;
26398 let substr_expr = match substr {
26399 Some(s) => *s,
26400 None => Expression::Null(Null),
26401 };
26402 let pos = match position {
26403 Some(p) => *p,
26404 None => Expression::number(1),
26405 };
26406
26407 // SUBSTRING(string, pos)
26408 let substring_call = Expression::Function(Box::new(Function::new(
26409 "SUBSTRING".to_string(),
26410 vec![string.clone(), pos.clone()],
26411 )));
26412 // STRPOS(SUBSTRING(string, pos), substr)
26413 let strpos_call = Expression::Function(Box::new(Function::new(
26414 "STRPOS".to_string(),
26415 vec![substring_call, substr_expr.clone()],
26416 )));
26417 // STRPOS(...) + pos - 1
26418 let pos_adjusted =
26419 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
26420 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26421 strpos_call.clone(),
26422 pos.clone(),
26423 ))),
26424 Expression::number(1),
26425 )));
26426 // STRPOS(...) = 0
26427 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
26428 strpos_call.clone(),
26429 Expression::number(0),
26430 )));
26431
26432 match target {
26433 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26434 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26435 Ok(Expression::Function(Box::new(Function::new(
26436 "IF".to_string(),
26437 vec![is_zero, Expression::number(0), pos_adjusted],
26438 ))))
26439 }
26440 DialectType::DuckDB => {
26441 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26442 Ok(Expression::Case(Box::new(Case {
26443 operand: None,
26444 whens: vec![(is_zero, Expression::number(0))],
26445 else_: Some(pos_adjusted),
26446 comments: Vec::new(),
26447 inferred_type: None,
26448 })))
26449 }
26450 _ => {
26451 // Reconstruct StrPosition
26452 Ok(Expression::StrPosition(Box::new(
26453 crate::expressions::StrPosition {
26454 this: Box::new(string),
26455 substr: Some(Box::new(substr_expr)),
26456 position: Some(Box::new(pos)),
26457 occurrence,
26458 },
26459 )))
26460 }
26461 }
26462 } else {
26463 Ok(e)
26464 }
26465 }
26466
26467 Action::MonthsBetweenConvert => {
26468 if let Expression::MonthsBetween(mb) = e {
26469 let crate::expressions::BinaryFunc {
26470 this: end_date,
26471 expression: start_date,
26472 ..
26473 } = *mb;
26474 match target {
26475 DialectType::DuckDB => {
26476 let cast_end = Self::ensure_cast_date(end_date);
26477 let cast_start = Self::ensure_cast_date(start_date);
26478 let dd = Expression::Function(Box::new(Function::new(
26479 "DATE_DIFF".to_string(),
26480 vec![
26481 Expression::string("MONTH"),
26482 cast_start.clone(),
26483 cast_end.clone(),
26484 ],
26485 )));
26486 let day_end = Expression::Function(Box::new(Function::new(
26487 "DAY".to_string(),
26488 vec![cast_end.clone()],
26489 )));
26490 let day_start = Expression::Function(Box::new(Function::new(
26491 "DAY".to_string(),
26492 vec![cast_start.clone()],
26493 )));
26494 let last_day_end = Expression::Function(Box::new(Function::new(
26495 "LAST_DAY".to_string(),
26496 vec![cast_end.clone()],
26497 )));
26498 let last_day_start = Expression::Function(Box::new(Function::new(
26499 "LAST_DAY".to_string(),
26500 vec![cast_start.clone()],
26501 )));
26502 let day_last_end = Expression::Function(Box::new(Function::new(
26503 "DAY".to_string(),
26504 vec![last_day_end],
26505 )));
26506 let day_last_start = Expression::Function(Box::new(Function::new(
26507 "DAY".to_string(),
26508 vec![last_day_start],
26509 )));
26510 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
26511 day_end.clone(),
26512 day_last_end,
26513 )));
26514 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
26515 day_start.clone(),
26516 day_last_start,
26517 )));
26518 let both_cond =
26519 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
26520 let day_diff =
26521 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
26522 let day_diff_paren =
26523 Expression::Paren(Box::new(crate::expressions::Paren {
26524 this: day_diff,
26525 trailing_comments: Vec::new(),
26526 }));
26527 let frac = Expression::Div(Box::new(BinaryOp::new(
26528 day_diff_paren,
26529 Expression::Literal(Box::new(Literal::Number(
26530 "31.0".to_string(),
26531 ))),
26532 )));
26533 let case_expr = Expression::Case(Box::new(Case {
26534 operand: None,
26535 whens: vec![(both_cond, Expression::number(0))],
26536 else_: Some(frac),
26537 comments: Vec::new(),
26538 inferred_type: None,
26539 }));
26540 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
26541 }
26542 DialectType::Snowflake | DialectType::Redshift => {
26543 let unit = Expression::Identifier(Identifier::new("MONTH"));
26544 Ok(Expression::Function(Box::new(Function::new(
26545 "DATEDIFF".to_string(),
26546 vec![unit, start_date, end_date],
26547 ))))
26548 }
26549 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26550 Ok(Expression::Function(Box::new(Function::new(
26551 "DATE_DIFF".to_string(),
26552 vec![Expression::string("MONTH"), start_date, end_date],
26553 ))))
26554 }
26555 _ => Ok(Expression::MonthsBetween(Box::new(
26556 crate::expressions::BinaryFunc {
26557 this: end_date,
26558 expression: start_date,
26559 original_name: None,
26560 inferred_type: None,
26561 },
26562 ))),
26563 }
26564 } else {
26565 Ok(e)
26566 }
26567 }
26568
26569 Action::AddMonthsConvert => {
26570 if let Expression::AddMonths(am) = e {
26571 let date = am.this;
26572 let val = am.expression;
26573 match target {
26574 DialectType::TSQL | DialectType::Fabric => {
26575 let cast_date = Self::ensure_cast_datetime2(date);
26576 Ok(Expression::Function(Box::new(Function::new(
26577 "DATEADD".to_string(),
26578 vec![
26579 Expression::Identifier(Identifier::new("MONTH")),
26580 val,
26581 cast_date,
26582 ],
26583 ))))
26584 }
26585 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
26586 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26587 // Optionally wrapped in CAST(... AS type) if the input had a specific type
26588
26589 // Determine the cast type from the date expression
26590 let (cast_date, return_type) = match &date {
26591 Expression::Literal(lit)
26592 if matches!(lit.as_ref(), Literal::String(_)) =>
26593 {
26594 // String literal: CAST(str AS TIMESTAMP), no outer CAST
26595 (
26596 Expression::Cast(Box::new(Cast {
26597 this: date.clone(),
26598 to: DataType::Timestamp {
26599 precision: None,
26600 timezone: false,
26601 },
26602 trailing_comments: Vec::new(),
26603 double_colon_syntax: false,
26604 format: None,
26605 default: None,
26606 inferred_type: None,
26607 })),
26608 None,
26609 )
26610 }
26611 Expression::Cast(c) => {
26612 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
26613 (date.clone(), Some(c.to.clone()))
26614 }
26615 _ => {
26616 // Expression or NULL::TYPE - keep as-is, check for cast type
26617 if let Expression::Cast(c) = &date {
26618 (date.clone(), Some(c.to.clone()))
26619 } else {
26620 (date.clone(), None)
26621 }
26622 }
26623 };
26624
26625 // Build the interval expression
26626 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
26627 // For integer values, use INTERVAL val MONTH
26628 let is_non_integer_val = match &val {
26629 Expression::Literal(lit)
26630 if matches!(lit.as_ref(), Literal::Number(_)) =>
26631 {
26632 let Literal::Number(n) = lit.as_ref() else {
26633 unreachable!()
26634 };
26635 n.contains('.')
26636 }
26637 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
26638 Expression::Neg(n) => {
26639 if let Expression::Literal(lit) = &n.this {
26640 if let Literal::Number(s) = lit.as_ref() {
26641 s.contains('.')
26642 } else {
26643 false
26644 }
26645 } else {
26646 false
26647 }
26648 }
26649 _ => false,
26650 };
26651
26652 let add_interval = if is_non_integer_val {
26653 // TO_MONTHS(CAST(ROUND(val) AS INT))
26654 let round_val = Expression::Function(Box::new(Function::new(
26655 "ROUND".to_string(),
26656 vec![val.clone()],
26657 )));
26658 let cast_int = Expression::Cast(Box::new(Cast {
26659 this: round_val,
26660 to: DataType::Int {
26661 length: None,
26662 integer_spelling: false,
26663 },
26664 trailing_comments: Vec::new(),
26665 double_colon_syntax: false,
26666 format: None,
26667 default: None,
26668 inferred_type: None,
26669 }));
26670 Expression::Function(Box::new(Function::new(
26671 "TO_MONTHS".to_string(),
26672 vec![cast_int],
26673 )))
26674 } else {
26675 // INTERVAL val MONTH
26676 // For negative numbers, wrap in parens
26677 let interval_val = match &val {
26678 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
26679 {
26680 let Literal::Number(_) = lit.as_ref() else {
26681 unreachable!()
26682 };
26683 Expression::Paren(Box::new(Paren {
26684 this: val.clone(),
26685 trailing_comments: Vec::new(),
26686 }))
26687 }
26688 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
26689 this: val.clone(),
26690 trailing_comments: Vec::new(),
26691 })),
26692 Expression::Null(_) => Expression::Paren(Box::new(Paren {
26693 this: val.clone(),
26694 trailing_comments: Vec::new(),
26695 })),
26696 _ => val.clone(),
26697 };
26698 Expression::Interval(Box::new(crate::expressions::Interval {
26699 this: Some(interval_val),
26700 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26701 unit: crate::expressions::IntervalUnit::Month,
26702 use_plural: false,
26703 }),
26704 }))
26705 };
26706
26707 // Build: date + interval
26708 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
26709 cast_date.clone(),
26710 add_interval.clone(),
26711 )));
26712
26713 // Build LAST_DAY(date)
26714 let last_day_date = Expression::Function(Box::new(Function::new(
26715 "LAST_DAY".to_string(),
26716 vec![cast_date.clone()],
26717 )));
26718
26719 // Build LAST_DAY(date + interval)
26720 let last_day_date_plus =
26721 Expression::Function(Box::new(Function::new(
26722 "LAST_DAY".to_string(),
26723 vec![date_plus_interval.clone()],
26724 )));
26725
26726 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26727 let case_expr = Expression::Case(Box::new(Case {
26728 operand: None,
26729 whens: vec![(
26730 Expression::Eq(Box::new(BinaryOp::new(
26731 last_day_date,
26732 cast_date.clone(),
26733 ))),
26734 last_day_date_plus,
26735 )],
26736 else_: Some(date_plus_interval),
26737 comments: Vec::new(),
26738 inferred_type: None,
26739 }));
26740
26741 // Wrap in CAST(... AS type) if needed
26742 if let Some(dt) = return_type {
26743 Ok(Expression::Cast(Box::new(Cast {
26744 this: case_expr,
26745 to: dt,
26746 trailing_comments: Vec::new(),
26747 double_colon_syntax: false,
26748 format: None,
26749 default: None,
26750 inferred_type: None,
26751 })))
26752 } else {
26753 Ok(case_expr)
26754 }
26755 }
26756 DialectType::DuckDB => {
26757 // Non-Snowflake source: simple date + INTERVAL
26758 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26759 {
26760 Expression::Cast(Box::new(Cast {
26761 this: date,
26762 to: DataType::Timestamp {
26763 precision: None,
26764 timezone: false,
26765 },
26766 trailing_comments: Vec::new(),
26767 double_colon_syntax: false,
26768 format: None,
26769 default: None,
26770 inferred_type: None,
26771 }))
26772 } else {
26773 date
26774 };
26775 let interval =
26776 Expression::Interval(Box::new(crate::expressions::Interval {
26777 this: Some(val),
26778 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26779 unit: crate::expressions::IntervalUnit::Month,
26780 use_plural: false,
26781 }),
26782 }));
26783 Ok(Expression::Add(Box::new(BinaryOp::new(
26784 cast_date, interval,
26785 ))))
26786 }
26787 DialectType::Snowflake => {
26788 // Keep ADD_MONTHS when source is also Snowflake
26789 if matches!(source, DialectType::Snowflake) {
26790 Ok(Expression::Function(Box::new(Function::new(
26791 "ADD_MONTHS".to_string(),
26792 vec![date, val],
26793 ))))
26794 } else {
26795 Ok(Expression::Function(Box::new(Function::new(
26796 "DATEADD".to_string(),
26797 vec![
26798 Expression::Identifier(Identifier::new("MONTH")),
26799 val,
26800 date,
26801 ],
26802 ))))
26803 }
26804 }
26805 DialectType::Redshift => {
26806 Ok(Expression::Function(Box::new(Function::new(
26807 "DATEADD".to_string(),
26808 vec![
26809 Expression::Identifier(Identifier::new("MONTH")),
26810 val,
26811 date,
26812 ],
26813 ))))
26814 }
26815 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26816 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26817 {
26818 Expression::Cast(Box::new(Cast {
26819 this: date,
26820 to: DataType::Timestamp {
26821 precision: None,
26822 timezone: false,
26823 },
26824 trailing_comments: Vec::new(),
26825 double_colon_syntax: false,
26826 format: None,
26827 default: None,
26828 inferred_type: None,
26829 }))
26830 } else {
26831 date
26832 };
26833 Ok(Expression::Function(Box::new(Function::new(
26834 "DATE_ADD".to_string(),
26835 vec![Expression::string("MONTH"), val, cast_date],
26836 ))))
26837 }
26838 DialectType::BigQuery => {
26839 let interval =
26840 Expression::Interval(Box::new(crate::expressions::Interval {
26841 this: Some(val),
26842 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26843 unit: crate::expressions::IntervalUnit::Month,
26844 use_plural: false,
26845 }),
26846 }));
26847 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26848 {
26849 Expression::Cast(Box::new(Cast {
26850 this: date,
26851 to: DataType::Custom {
26852 name: "DATETIME".to_string(),
26853 },
26854 trailing_comments: Vec::new(),
26855 double_colon_syntax: false,
26856 format: None,
26857 default: None,
26858 inferred_type: None,
26859 }))
26860 } else {
26861 date
26862 };
26863 Ok(Expression::Function(Box::new(Function::new(
26864 "DATE_ADD".to_string(),
26865 vec![cast_date, interval],
26866 ))))
26867 }
26868 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
26869 Ok(Expression::Function(Box::new(Function::new(
26870 "ADD_MONTHS".to_string(),
26871 vec![date, val],
26872 ))))
26873 }
26874 _ => {
26875 // Default: keep as AddMonths expression
26876 Ok(Expression::AddMonths(Box::new(
26877 crate::expressions::BinaryFunc {
26878 this: date,
26879 expression: val,
26880 original_name: None,
26881 inferred_type: None,
26882 },
26883 )))
26884 }
26885 }
26886 } else {
26887 Ok(e)
26888 }
26889 }
26890
26891 Action::PercentileContConvert => {
26892 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
26893 // Presto/Trino: APPROX_PERCENTILE(col, p)
26894 // Spark/Databricks: PERCENTILE_APPROX(col, p)
26895 if let Expression::WithinGroup(wg) = e {
26896 // Extract percentile value and order by column
26897 let (percentile, _is_disc) = match &wg.this {
26898 Expression::Function(f) => {
26899 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26900 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
26901 Box::new(Literal::Number("0.5".to_string())),
26902 ));
26903 (pct, is_disc)
26904 }
26905 Expression::AggregateFunction(af) => {
26906 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26907 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
26908 Box::new(Literal::Number("0.5".to_string())),
26909 ));
26910 (pct, is_disc)
26911 }
26912 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
26913 _ => return Ok(Expression::WithinGroup(wg)),
26914 };
26915 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
26916 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
26917 );
26918
26919 let func_name = match target {
26920 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26921 "APPROX_PERCENTILE"
26922 }
26923 _ => "PERCENTILE_APPROX", // Spark, Databricks
26924 };
26925 Ok(Expression::Function(Box::new(Function::new(
26926 func_name.to_string(),
26927 vec![col, percentile],
26928 ))))
26929 } else {
26930 Ok(e)
26931 }
26932 }
26933
26934 Action::CurrentUserSparkParens => {
26935 // CURRENT_USER -> CURRENT_USER() for Spark
26936 if let Expression::CurrentUser(_) = e {
26937 Ok(Expression::Function(Box::new(Function::new(
26938 "CURRENT_USER".to_string(),
26939 vec![],
26940 ))))
26941 } else {
26942 Ok(e)
26943 }
26944 }
26945
26946 Action::SparkDateFuncCast => {
26947 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
26948 let cast_arg = |arg: Expression| -> Expression {
26949 match target {
26950 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26951 Self::double_cast_timestamp_date(arg)
26952 }
26953 _ => {
26954 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
26955 Self::ensure_cast_date(arg)
26956 }
26957 }
26958 };
26959 match e {
26960 Expression::Month(f) => Ok(Expression::Month(Box::new(
26961 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26962 ))),
26963 Expression::Year(f) => Ok(Expression::Year(Box::new(
26964 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26965 ))),
26966 Expression::Day(f) => Ok(Expression::Day(Box::new(
26967 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26968 ))),
26969 other => Ok(other),
26970 }
26971 }
26972
26973 Action::MapFromArraysConvert => {
26974 // Expression::MapFromArrays -> target-specific
26975 if let Expression::MapFromArrays(mfa) = e {
26976 let keys = mfa.this;
26977 let values = mfa.expression;
26978 match target {
26979 DialectType::Snowflake => Ok(Expression::Function(Box::new(
26980 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
26981 ))),
26982 _ => {
26983 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
26984 Ok(Expression::Function(Box::new(Function::new(
26985 "MAP".to_string(),
26986 vec![keys, values],
26987 ))))
26988 }
26989 }
26990 } else {
26991 Ok(e)
26992 }
26993 }
26994
26995 Action::AnyToExists => {
26996 if let Expression::Any(q) = e {
26997 if let Some(op) = q.op.clone() {
26998 let lambda_param = crate::expressions::Identifier::new("x");
26999 let rhs = Expression::Identifier(lambda_param.clone());
27000 let body = match op {
27001 crate::expressions::QuantifiedOp::Eq => {
27002 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
27003 }
27004 crate::expressions::QuantifiedOp::Neq => {
27005 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
27006 }
27007 crate::expressions::QuantifiedOp::Lt => {
27008 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
27009 }
27010 crate::expressions::QuantifiedOp::Lte => {
27011 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
27012 }
27013 crate::expressions::QuantifiedOp::Gt => {
27014 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
27015 }
27016 crate::expressions::QuantifiedOp::Gte => {
27017 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
27018 }
27019 };
27020 let lambda =
27021 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27022 parameters: vec![lambda_param],
27023 body,
27024 colon: false,
27025 parameter_types: Vec::new(),
27026 }));
27027 Ok(Expression::Function(Box::new(Function::new(
27028 "EXISTS".to_string(),
27029 vec![q.subquery, lambda],
27030 ))))
27031 } else {
27032 Ok(Expression::Any(q))
27033 }
27034 } else {
27035 Ok(e)
27036 }
27037 }
27038
27039 Action::GenerateSeriesConvert => {
27040 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
27041 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
27042 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
27043 if let Expression::Function(f) = e {
27044 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
27045 let start = f.args[0].clone();
27046 let end = f.args[1].clone();
27047 let step = f.args.get(2).cloned();
27048
27049 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
27050 let step = step.map(|s| Self::normalize_interval_string(s, target));
27051
27052 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
27053 let maybe_cast_timestamp = |arg: Expression| -> Expression {
27054 if matches!(
27055 target,
27056 DialectType::Presto
27057 | DialectType::Trino
27058 | DialectType::Athena
27059 | DialectType::Spark
27060 | DialectType::Databricks
27061 | DialectType::Hive
27062 ) {
27063 match &arg {
27064 Expression::CurrentTimestamp(_) => {
27065 Expression::Cast(Box::new(Cast {
27066 this: arg,
27067 to: DataType::Timestamp {
27068 precision: None,
27069 timezone: false,
27070 },
27071 trailing_comments: Vec::new(),
27072 double_colon_syntax: false,
27073 format: None,
27074 default: None,
27075 inferred_type: None,
27076 }))
27077 }
27078 _ => arg,
27079 }
27080 } else {
27081 arg
27082 }
27083 };
27084
27085 let start = maybe_cast_timestamp(start);
27086 let end = maybe_cast_timestamp(end);
27087
27088 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
27089 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
27090 let mut gs_args = vec![start, end];
27091 if let Some(step) = step {
27092 gs_args.push(step);
27093 }
27094 return Ok(Expression::Function(Box::new(Function::new(
27095 "GENERATE_SERIES".to_string(),
27096 gs_args,
27097 ))));
27098 }
27099
27100 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
27101 if matches!(target, DialectType::DuckDB) {
27102 let mut gs_args = vec![start, end];
27103 if let Some(step) = step {
27104 gs_args.push(step);
27105 }
27106 let gs = Expression::Function(Box::new(Function::new(
27107 "GENERATE_SERIES".to_string(),
27108 gs_args,
27109 )));
27110 return Ok(Expression::Function(Box::new(Function::new(
27111 "UNNEST".to_string(),
27112 vec![gs],
27113 ))));
27114 }
27115
27116 let mut seq_args = vec![start, end];
27117 if let Some(step) = step {
27118 seq_args.push(step);
27119 }
27120
27121 let seq = Expression::Function(Box::new(Function::new(
27122 "SEQUENCE".to_string(),
27123 seq_args,
27124 )));
27125
27126 match target {
27127 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27128 // Wrap in UNNEST
27129 Ok(Expression::Function(Box::new(Function::new(
27130 "UNNEST".to_string(),
27131 vec![seq],
27132 ))))
27133 }
27134 DialectType::Spark
27135 | DialectType::Databricks
27136 | DialectType::Hive => {
27137 // Wrap in EXPLODE
27138 Ok(Expression::Function(Box::new(Function::new(
27139 "EXPLODE".to_string(),
27140 vec![seq],
27141 ))))
27142 }
27143 _ => {
27144 // Just SEQUENCE for others
27145 Ok(seq)
27146 }
27147 }
27148 } else {
27149 Ok(Expression::Function(f))
27150 }
27151 } else {
27152 Ok(e)
27153 }
27154 }
27155
27156 Action::ConcatCoalesceWrap => {
27157 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
27158 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
27159 if let Expression::Function(f) = e {
27160 if f.name.eq_ignore_ascii_case("CONCAT") {
27161 let new_args: Vec<Expression> = f
27162 .args
27163 .into_iter()
27164 .map(|arg| {
27165 let cast_arg = if matches!(
27166 target,
27167 DialectType::Presto
27168 | DialectType::Trino
27169 | DialectType::Athena
27170 ) {
27171 Expression::Cast(Box::new(Cast {
27172 this: arg,
27173 to: DataType::VarChar {
27174 length: None,
27175 parenthesized_length: false,
27176 },
27177 trailing_comments: Vec::new(),
27178 double_colon_syntax: false,
27179 format: None,
27180 default: None,
27181 inferred_type: None,
27182 }))
27183 } else {
27184 arg
27185 };
27186 Expression::Function(Box::new(Function::new(
27187 "COALESCE".to_string(),
27188 vec![cast_arg, Expression::string("")],
27189 )))
27190 })
27191 .collect();
27192 Ok(Expression::Function(Box::new(Function::new(
27193 "CONCAT".to_string(),
27194 new_args,
27195 ))))
27196 } else {
27197 Ok(Expression::Function(f))
27198 }
27199 } else {
27200 Ok(e)
27201 }
27202 }
27203
27204 Action::PipeConcatToConcat => {
27205 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
27206 if let Expression::Concat(op) = e {
27207 let cast_left = Expression::Cast(Box::new(Cast {
27208 this: op.left,
27209 to: DataType::VarChar {
27210 length: None,
27211 parenthesized_length: false,
27212 },
27213 trailing_comments: Vec::new(),
27214 double_colon_syntax: false,
27215 format: None,
27216 default: None,
27217 inferred_type: None,
27218 }));
27219 let cast_right = Expression::Cast(Box::new(Cast {
27220 this: op.right,
27221 to: DataType::VarChar {
27222 length: None,
27223 parenthesized_length: false,
27224 },
27225 trailing_comments: Vec::new(),
27226 double_colon_syntax: false,
27227 format: None,
27228 default: None,
27229 inferred_type: None,
27230 }));
27231 Ok(Expression::Function(Box::new(Function::new(
27232 "CONCAT".to_string(),
27233 vec![cast_left, cast_right],
27234 ))))
27235 } else {
27236 Ok(e)
27237 }
27238 }
27239
27240 Action::DivFuncConvert => {
27241 // DIV(a, b) -> target-specific integer division
27242 if let Expression::Function(f) = e {
27243 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
27244 let a = f.args[0].clone();
27245 let b = f.args[1].clone();
27246 match target {
27247 DialectType::DuckDB => {
27248 // DIV(a, b) -> CAST(a // b AS DECIMAL)
27249 let int_div = Expression::IntDiv(Box::new(
27250 crate::expressions::BinaryFunc {
27251 this: a,
27252 expression: b,
27253 original_name: None,
27254 inferred_type: None,
27255 },
27256 ));
27257 Ok(Expression::Cast(Box::new(Cast {
27258 this: int_div,
27259 to: DataType::Decimal {
27260 precision: None,
27261 scale: None,
27262 },
27263 trailing_comments: Vec::new(),
27264 double_colon_syntax: false,
27265 format: None,
27266 default: None,
27267 inferred_type: None,
27268 })))
27269 }
27270 DialectType::BigQuery => {
27271 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
27272 let div_func = Expression::Function(Box::new(Function::new(
27273 "DIV".to_string(),
27274 vec![a, b],
27275 )));
27276 Ok(Expression::Cast(Box::new(Cast {
27277 this: div_func,
27278 to: DataType::Custom {
27279 name: "NUMERIC".to_string(),
27280 },
27281 trailing_comments: Vec::new(),
27282 double_colon_syntax: false,
27283 format: None,
27284 default: None,
27285 inferred_type: None,
27286 })))
27287 }
27288 DialectType::SQLite => {
27289 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
27290 let cast_a = Expression::Cast(Box::new(Cast {
27291 this: a,
27292 to: DataType::Custom {
27293 name: "REAL".to_string(),
27294 },
27295 trailing_comments: Vec::new(),
27296 double_colon_syntax: false,
27297 format: None,
27298 default: None,
27299 inferred_type: None,
27300 }));
27301 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
27302 let cast_int = Expression::Cast(Box::new(Cast {
27303 this: div,
27304 to: DataType::Int {
27305 length: None,
27306 integer_spelling: true,
27307 },
27308 trailing_comments: Vec::new(),
27309 double_colon_syntax: false,
27310 format: None,
27311 default: None,
27312 inferred_type: None,
27313 }));
27314 Ok(Expression::Cast(Box::new(Cast {
27315 this: cast_int,
27316 to: DataType::Custom {
27317 name: "REAL".to_string(),
27318 },
27319 trailing_comments: Vec::new(),
27320 double_colon_syntax: false,
27321 format: None,
27322 default: None,
27323 inferred_type: None,
27324 })))
27325 }
27326 _ => Ok(Expression::Function(f)),
27327 }
27328 } else {
27329 Ok(Expression::Function(f))
27330 }
27331 } else {
27332 Ok(e)
27333 }
27334 }
27335
27336 Action::JsonObjectAggConvert => {
27337 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
27338 match e {
27339 Expression::Function(f) => Ok(Expression::Function(Box::new(
27340 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
27341 ))),
27342 Expression::AggregateFunction(af) => {
27343 // AggregateFunction stores all args in the `args` vec
27344 Ok(Expression::Function(Box::new(Function::new(
27345 "JSON_GROUP_OBJECT".to_string(),
27346 af.args,
27347 ))))
27348 }
27349 other => Ok(other),
27350 }
27351 }
27352
27353 Action::JsonbExistsConvert => {
27354 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
27355 if let Expression::Function(f) = e {
27356 if f.args.len() == 2 {
27357 let json_expr = f.args[0].clone();
27358 let key = match &f.args[1] {
27359 Expression::Literal(lit)
27360 if matches!(
27361 lit.as_ref(),
27362 crate::expressions::Literal::String(_)
27363 ) =>
27364 {
27365 let crate::expressions::Literal::String(s) = lit.as_ref()
27366 else {
27367 unreachable!()
27368 };
27369 format!("$.{}", s)
27370 }
27371 _ => return Ok(Expression::Function(f)),
27372 };
27373 Ok(Expression::Function(Box::new(Function::new(
27374 "JSON_EXISTS".to_string(),
27375 vec![json_expr, Expression::string(&key)],
27376 ))))
27377 } else {
27378 Ok(Expression::Function(f))
27379 }
27380 } else {
27381 Ok(e)
27382 }
27383 }
27384
27385 Action::DateBinConvert => {
27386 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
27387 if let Expression::Function(f) = e {
27388 Ok(Expression::Function(Box::new(Function::new(
27389 "TIME_BUCKET".to_string(),
27390 f.args,
27391 ))))
27392 } else {
27393 Ok(e)
27394 }
27395 }
27396
27397 Action::MysqlCastCharToText => {
27398 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
27399 if let Expression::Cast(mut c) = e {
27400 c.to = DataType::Text;
27401 Ok(Expression::Cast(c))
27402 } else {
27403 Ok(e)
27404 }
27405 }
27406
27407 Action::SparkCastVarcharToString => {
27408 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
27409 match e {
27410 Expression::Cast(mut c) => {
27411 c.to = Self::normalize_varchar_to_string(c.to);
27412 Ok(Expression::Cast(c))
27413 }
27414 Expression::TryCast(mut c) => {
27415 c.to = Self::normalize_varchar_to_string(c.to);
27416 Ok(Expression::TryCast(c))
27417 }
27418 _ => Ok(e),
27419 }
27420 }
27421
27422 Action::MinMaxToLeastGreatest => {
27423 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
27424 if let Expression::Function(f) = e {
27425 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
27426 "LEAST"
27427 } else if f.name.eq_ignore_ascii_case("MAX") {
27428 "GREATEST"
27429 } else {
27430 return Ok(Expression::Function(f));
27431 };
27432 Ok(Expression::Function(Box::new(Function::new(
27433 new_name.to_string(),
27434 f.args,
27435 ))))
27436 } else {
27437 Ok(e)
27438 }
27439 }
27440
27441 Action::ClickHouseUniqToApproxCountDistinct => {
27442 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
27443 if let Expression::Function(f) = e {
27444 Ok(Expression::Function(Box::new(Function::new(
27445 "APPROX_COUNT_DISTINCT".to_string(),
27446 f.args,
27447 ))))
27448 } else {
27449 Ok(e)
27450 }
27451 }
27452
27453 Action::ClickHouseAnyToAnyValue => {
27454 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
27455 if let Expression::Function(f) = e {
27456 Ok(Expression::Function(Box::new(Function::new(
27457 "ANY_VALUE".to_string(),
27458 f.args,
27459 ))))
27460 } else {
27461 Ok(e)
27462 }
27463 }
27464
27465 Action::OracleVarchar2ToVarchar => {
27466 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
27467 if let Expression::DataType(DataType::Custom { ref name }) = e {
27468 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
27469 let starts_varchar2 =
27470 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
27471 let starts_nvarchar2 =
27472 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
27473 let inner = if starts_varchar2 || starts_nvarchar2 {
27474 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
27475 let end = name.len() - 1; // skip trailing ")"
27476 Some(&name[start..end])
27477 } else {
27478 Option::None
27479 };
27480 if let Some(inner_str) = inner {
27481 // Parse the number part, ignoring BYTE/CHAR qualifier
27482 let num_str = inner_str.split_whitespace().next().unwrap_or("");
27483 if let Ok(n) = num_str.parse::<u32>() {
27484 Ok(Expression::DataType(DataType::VarChar {
27485 length: Some(n),
27486 parenthesized_length: false,
27487 }))
27488 } else {
27489 Ok(e)
27490 }
27491 } else {
27492 // Plain VARCHAR2 / NVARCHAR2 without parens
27493 Ok(Expression::DataType(DataType::VarChar {
27494 length: Option::None,
27495 parenthesized_length: false,
27496 }))
27497 }
27498 } else {
27499 Ok(e)
27500 }
27501 }
27502
27503 Action::Nvl2Expand => {
27504 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
27505 // But keep as NVL2 for dialects that support it natively
27506 let nvl2_native = matches!(
27507 target,
27508 DialectType::Oracle
27509 | DialectType::Snowflake
27510 | DialectType::Redshift
27511 | DialectType::Teradata
27512 | DialectType::Spark
27513 | DialectType::Databricks
27514 );
27515 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
27516 if nvl2_native {
27517 return Ok(Expression::Nvl2(nvl2));
27518 }
27519 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
27520 } else if let Expression::Function(f) = e {
27521 if nvl2_native {
27522 return Ok(Expression::Function(Box::new(Function::new(
27523 "NVL2".to_string(),
27524 f.args,
27525 ))));
27526 }
27527 if f.args.len() < 2 {
27528 return Ok(Expression::Function(f));
27529 }
27530 let mut args = f.args;
27531 let a = args.remove(0);
27532 let b = args.remove(0);
27533 let c = if !args.is_empty() {
27534 Some(args.remove(0))
27535 } else {
27536 Option::None
27537 };
27538 (a, b, c)
27539 } else {
27540 return Ok(e);
27541 };
27542 // Build: NOT (a IS NULL)
27543 let is_null = Expression::IsNull(Box::new(IsNull {
27544 this: a,
27545 not: false,
27546 postfix_form: false,
27547 }));
27548 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
27549 this: is_null,
27550 inferred_type: None,
27551 }));
27552 Ok(Expression::Case(Box::new(Case {
27553 operand: Option::None,
27554 whens: vec![(not_null, b)],
27555 else_: c,
27556 comments: Vec::new(),
27557 inferred_type: None,
27558 })))
27559 }
27560
27561 Action::IfnullToCoalesce => {
27562 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
27563 if let Expression::Coalesce(mut cf) = e {
27564 cf.original_name = Option::None;
27565 Ok(Expression::Coalesce(cf))
27566 } else if let Expression::Function(f) = e {
27567 Ok(Expression::Function(Box::new(Function::new(
27568 "COALESCE".to_string(),
27569 f.args,
27570 ))))
27571 } else {
27572 Ok(e)
27573 }
27574 }
27575
27576 Action::IsAsciiConvert => {
27577 // IS_ASCII(x) -> dialect-specific ASCII check
27578 if let Expression::Function(f) = e {
27579 let arg = f.args.into_iter().next().unwrap();
27580 match target {
27581 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
27582 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
27583 Ok(Expression::Function(Box::new(Function::new(
27584 "REGEXP_LIKE".to_string(),
27585 vec![
27586 arg,
27587 Expression::Literal(Box::new(Literal::String(
27588 "^[[:ascii:]]*$".to_string(),
27589 ))),
27590 ],
27591 ))))
27592 }
27593 DialectType::PostgreSQL
27594 | DialectType::Redshift
27595 | DialectType::Materialize
27596 | DialectType::RisingWave => {
27597 // (x ~ '^[[:ascii:]]*$')
27598 Ok(Expression::Paren(Box::new(Paren {
27599 this: Expression::RegexpLike(Box::new(
27600 crate::expressions::RegexpFunc {
27601 this: arg,
27602 pattern: Expression::Literal(Box::new(
27603 Literal::String("^[[:ascii:]]*$".to_string()),
27604 )),
27605 flags: Option::None,
27606 },
27607 )),
27608 trailing_comments: Vec::new(),
27609 })))
27610 }
27611 DialectType::SQLite => {
27612 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
27613 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
27614 "2a5b5e012d7f5d2a".to_string(),
27615 )));
27616 let cast_expr = Expression::Cast(Box::new(Cast {
27617 this: hex_lit,
27618 to: DataType::Text,
27619 trailing_comments: Vec::new(),
27620 double_colon_syntax: false,
27621 format: Option::None,
27622 default: Option::None,
27623 inferred_type: None,
27624 }));
27625 let glob = Expression::Glob(Box::new(BinaryOp {
27626 left: arg,
27627 right: cast_expr,
27628 left_comments: Vec::new(),
27629 operator_comments: Vec::new(),
27630 trailing_comments: Vec::new(),
27631 inferred_type: None,
27632 }));
27633 Ok(Expression::Paren(Box::new(Paren {
27634 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
27635 this: glob,
27636 inferred_type: None,
27637 })),
27638 trailing_comments: Vec::new(),
27639 })))
27640 }
27641 DialectType::TSQL | DialectType::Fabric => {
27642 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
27643 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
27644 "255b5e002d7f5d25".to_string(),
27645 )));
27646 let convert_expr = Expression::Convert(Box::new(
27647 crate::expressions::ConvertFunc {
27648 this: hex_lit,
27649 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
27650 style: None,
27651 },
27652 ));
27653 let collated = Expression::Collation(Box::new(
27654 crate::expressions::CollationExpr {
27655 this: convert_expr,
27656 collation: "Latin1_General_BIN".to_string(),
27657 quoted: false,
27658 double_quoted: false,
27659 },
27660 ));
27661 let patindex = Expression::Function(Box::new(Function::new(
27662 "PATINDEX".to_string(),
27663 vec![collated, arg],
27664 )));
27665 let zero =
27666 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27667 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27668 left: patindex,
27669 right: zero,
27670 left_comments: Vec::new(),
27671 operator_comments: Vec::new(),
27672 trailing_comments: Vec::new(),
27673 inferred_type: None,
27674 }));
27675 Ok(Expression::Paren(Box::new(Paren {
27676 this: eq_zero,
27677 trailing_comments: Vec::new(),
27678 })))
27679 }
27680 DialectType::Oracle => {
27681 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
27682 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27683 let s1 = Expression::Literal(Box::new(Literal::String(
27684 "^[".to_string(),
27685 )));
27686 let chr1 = Expression::Function(Box::new(Function::new(
27687 "CHR".to_string(),
27688 vec![Expression::Literal(Box::new(Literal::Number(
27689 "1".to_string(),
27690 )))],
27691 )));
27692 let dash =
27693 Expression::Literal(Box::new(Literal::String("-".to_string())));
27694 let chr127 = Expression::Function(Box::new(Function::new(
27695 "CHR".to_string(),
27696 vec![Expression::Literal(Box::new(Literal::Number(
27697 "127".to_string(),
27698 )))],
27699 )));
27700 let s2 = Expression::Literal(Box::new(Literal::String(
27701 "]*$".to_string(),
27702 )));
27703 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27704 let concat1 =
27705 Expression::DPipe(Box::new(crate::expressions::DPipe {
27706 this: Box::new(s1),
27707 expression: Box::new(chr1),
27708 safe: None,
27709 }));
27710 let concat2 =
27711 Expression::DPipe(Box::new(crate::expressions::DPipe {
27712 this: Box::new(concat1),
27713 expression: Box::new(dash),
27714 safe: None,
27715 }));
27716 let concat3 =
27717 Expression::DPipe(Box::new(crate::expressions::DPipe {
27718 this: Box::new(concat2),
27719 expression: Box::new(chr127),
27720 safe: None,
27721 }));
27722 let concat4 =
27723 Expression::DPipe(Box::new(crate::expressions::DPipe {
27724 this: Box::new(concat3),
27725 expression: Box::new(s2),
27726 safe: None,
27727 }));
27728 let regexp_like = Expression::Function(Box::new(Function::new(
27729 "REGEXP_LIKE".to_string(),
27730 vec![arg, concat4],
27731 )));
27732 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
27733 let true_expr =
27734 Expression::Column(Box::new(crate::expressions::Column {
27735 name: Identifier {
27736 name: "TRUE".to_string(),
27737 quoted: false,
27738 trailing_comments: Vec::new(),
27739 span: None,
27740 },
27741 table: None,
27742 join_mark: false,
27743 trailing_comments: Vec::new(),
27744 span: None,
27745 inferred_type: None,
27746 }));
27747 let nvl = Expression::Function(Box::new(Function::new(
27748 "NVL".to_string(),
27749 vec![regexp_like, true_expr],
27750 )));
27751 Ok(nvl)
27752 }
27753 _ => Ok(Expression::Function(Box::new(Function::new(
27754 "IS_ASCII".to_string(),
27755 vec![arg],
27756 )))),
27757 }
27758 } else {
27759 Ok(e)
27760 }
27761 }
27762
27763 Action::StrPositionConvert => {
27764 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
27765 if let Expression::Function(f) = e {
27766 if f.args.len() < 2 {
27767 return Ok(Expression::Function(f));
27768 }
27769 let mut args = f.args;
27770
27771 let haystack = args.remove(0);
27772 let needle = args.remove(0);
27773 let position = if !args.is_empty() {
27774 Some(args.remove(0))
27775 } else {
27776 Option::None
27777 };
27778 let occurrence = if !args.is_empty() {
27779 Some(args.remove(0))
27780 } else {
27781 Option::None
27782 };
27783
27784 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
27785 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
27786 fn build_position_expansion(
27787 haystack: Expression,
27788 needle: Expression,
27789 pos: Expression,
27790 occurrence: Option<Expression>,
27791 inner_func: &str,
27792 wrapper: &str, // "CASE", "IF", "IIF"
27793 ) -> Expression {
27794 let substr = Expression::Function(Box::new(Function::new(
27795 "SUBSTRING".to_string(),
27796 vec![haystack, pos.clone()],
27797 )));
27798 let mut inner_args = vec![substr, needle];
27799 if let Some(occ) = occurrence {
27800 inner_args.push(occ);
27801 }
27802 let inner_call = Expression::Function(Box::new(Function::new(
27803 inner_func.to_string(),
27804 inner_args,
27805 )));
27806 let zero =
27807 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27808 let one =
27809 Expression::Literal(Box::new(Literal::Number("1".to_string())));
27810 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27811 left: inner_call.clone(),
27812 right: zero.clone(),
27813 left_comments: Vec::new(),
27814 operator_comments: Vec::new(),
27815 trailing_comments: Vec::new(),
27816 inferred_type: None,
27817 }));
27818 let add_pos = Expression::Add(Box::new(BinaryOp {
27819 left: inner_call,
27820 right: pos,
27821 left_comments: Vec::new(),
27822 operator_comments: Vec::new(),
27823 trailing_comments: Vec::new(),
27824 inferred_type: None,
27825 }));
27826 let sub_one = Expression::Sub(Box::new(BinaryOp {
27827 left: add_pos,
27828 right: one,
27829 left_comments: Vec::new(),
27830 operator_comments: Vec::new(),
27831 trailing_comments: Vec::new(),
27832 inferred_type: None,
27833 }));
27834
27835 match wrapper {
27836 "CASE" => Expression::Case(Box::new(Case {
27837 operand: Option::None,
27838 whens: vec![(eq_zero, zero)],
27839 else_: Some(sub_one),
27840 comments: Vec::new(),
27841 inferred_type: None,
27842 })),
27843 "IIF" => Expression::Function(Box::new(Function::new(
27844 "IIF".to_string(),
27845 vec![eq_zero, zero, sub_one],
27846 ))),
27847 _ => Expression::Function(Box::new(Function::new(
27848 "IF".to_string(),
27849 vec![eq_zero, zero, sub_one],
27850 ))),
27851 }
27852 }
27853
27854 match target {
27855 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
27856 DialectType::Athena
27857 | DialectType::DuckDB
27858 | DialectType::Presto
27859 | DialectType::Trino
27860 | DialectType::Drill => {
27861 if let Some(pos) = position {
27862 let wrapper = if matches!(target, DialectType::DuckDB) {
27863 "CASE"
27864 } else {
27865 "IF"
27866 };
27867 let result = build_position_expansion(
27868 haystack, needle, pos, occurrence, "STRPOS", wrapper,
27869 );
27870 if matches!(target, DialectType::Drill) {
27871 // Drill uses backtick-quoted `IF`
27872 if let Expression::Function(mut f) = result {
27873 f.name = "`IF`".to_string();
27874 Ok(Expression::Function(f))
27875 } else {
27876 Ok(result)
27877 }
27878 } else {
27879 Ok(result)
27880 }
27881 } else {
27882 Ok(Expression::Function(Box::new(Function::new(
27883 "STRPOS".to_string(),
27884 vec![haystack, needle],
27885 ))))
27886 }
27887 }
27888 // SQLite: IIF wrapper
27889 DialectType::SQLite => {
27890 if let Some(pos) = position {
27891 Ok(build_position_expansion(
27892 haystack, needle, pos, occurrence, "INSTR", "IIF",
27893 ))
27894 } else {
27895 Ok(Expression::Function(Box::new(Function::new(
27896 "INSTR".to_string(),
27897 vec![haystack, needle],
27898 ))))
27899 }
27900 }
27901 // INSTR group: Teradata, BigQuery, Oracle
27902 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
27903 let mut a = vec![haystack, needle];
27904 if let Some(pos) = position {
27905 a.push(pos);
27906 }
27907 if let Some(occ) = occurrence {
27908 a.push(occ);
27909 }
27910 Ok(Expression::Function(Box::new(Function::new(
27911 "INSTR".to_string(),
27912 a,
27913 ))))
27914 }
27915 // CHARINDEX group: Snowflake, TSQL
27916 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
27917 let mut a = vec![needle, haystack];
27918 if let Some(pos) = position {
27919 a.push(pos);
27920 }
27921 Ok(Expression::Function(Box::new(Function::new(
27922 "CHARINDEX".to_string(),
27923 a,
27924 ))))
27925 }
27926 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
27927 DialectType::PostgreSQL
27928 | DialectType::Materialize
27929 | DialectType::RisingWave
27930 | DialectType::Redshift => {
27931 if let Some(pos) = position {
27932 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
27933 // ELSE POSITION(...) + pos - 1 END
27934 let substr = Expression::Substring(Box::new(
27935 crate::expressions::SubstringFunc {
27936 this: haystack,
27937 start: pos.clone(),
27938 length: Option::None,
27939 from_for_syntax: true,
27940 },
27941 ));
27942 let pos_in = Expression::StrPosition(Box::new(
27943 crate::expressions::StrPosition {
27944 this: Box::new(substr),
27945 substr: Some(Box::new(needle)),
27946 position: Option::None,
27947 occurrence: Option::None,
27948 },
27949 ));
27950 let zero = Expression::Literal(Box::new(Literal::Number(
27951 "0".to_string(),
27952 )));
27953 let one = Expression::Literal(Box::new(Literal::Number(
27954 "1".to_string(),
27955 )));
27956 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27957 left: pos_in.clone(),
27958 right: zero.clone(),
27959 left_comments: Vec::new(),
27960 operator_comments: Vec::new(),
27961 trailing_comments: Vec::new(),
27962 inferred_type: None,
27963 }));
27964 let add_pos = Expression::Add(Box::new(BinaryOp {
27965 left: pos_in,
27966 right: pos,
27967 left_comments: Vec::new(),
27968 operator_comments: Vec::new(),
27969 trailing_comments: Vec::new(),
27970 inferred_type: None,
27971 }));
27972 let sub_one = Expression::Sub(Box::new(BinaryOp {
27973 left: add_pos,
27974 right: one,
27975 left_comments: Vec::new(),
27976 operator_comments: Vec::new(),
27977 trailing_comments: Vec::new(),
27978 inferred_type: None,
27979 }));
27980 Ok(Expression::Case(Box::new(Case {
27981 operand: Option::None,
27982 whens: vec![(eq_zero, zero)],
27983 else_: Some(sub_one),
27984 comments: Vec::new(),
27985 inferred_type: None,
27986 })))
27987 } else {
27988 Ok(Expression::StrPosition(Box::new(
27989 crate::expressions::StrPosition {
27990 this: Box::new(haystack),
27991 substr: Some(Box::new(needle)),
27992 position: Option::None,
27993 occurrence: Option::None,
27994 },
27995 )))
27996 }
27997 }
27998 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
27999 DialectType::MySQL
28000 | DialectType::SingleStore
28001 | DialectType::TiDB
28002 | DialectType::Hive
28003 | DialectType::Spark
28004 | DialectType::Databricks
28005 | DialectType::Doris
28006 | DialectType::StarRocks => {
28007 let mut a = vec![needle, haystack];
28008 if let Some(pos) = position {
28009 a.push(pos);
28010 }
28011 Ok(Expression::Function(Box::new(Function::new(
28012 "LOCATE".to_string(),
28013 a,
28014 ))))
28015 }
28016 // ClickHouse: POSITION(haystack, needle[, position])
28017 DialectType::ClickHouse => {
28018 let mut a = vec![haystack, needle];
28019 if let Some(pos) = position {
28020 a.push(pos);
28021 }
28022 Ok(Expression::Function(Box::new(Function::new(
28023 "POSITION".to_string(),
28024 a,
28025 ))))
28026 }
28027 _ => {
28028 let mut a = vec![haystack, needle];
28029 if let Some(pos) = position {
28030 a.push(pos);
28031 }
28032 if let Some(occ) = occurrence {
28033 a.push(occ);
28034 }
28035 Ok(Expression::Function(Box::new(Function::new(
28036 "STR_POSITION".to_string(),
28037 a,
28038 ))))
28039 }
28040 }
28041 } else {
28042 Ok(e)
28043 }
28044 }
28045
28046 Action::ArraySumConvert => {
28047 // ARRAY_SUM(arr) -> dialect-specific
28048 if let Expression::Function(f) = e {
28049 let args = f.args;
28050 match target {
28051 DialectType::DuckDB => Ok(Expression::Function(Box::new(
28052 Function::new("LIST_SUM".to_string(), args),
28053 ))),
28054 DialectType::Spark | DialectType::Databricks => {
28055 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
28056 let arr = args.into_iter().next().unwrap();
28057 let zero =
28058 Expression::Literal(Box::new(Literal::Number("0".to_string())));
28059 let acc_id = Identifier::new("acc");
28060 let x_id = Identifier::new("x");
28061 let acc = Expression::Identifier(acc_id.clone());
28062 let x = Expression::Identifier(x_id.clone());
28063 let add = Expression::Add(Box::new(BinaryOp {
28064 left: acc.clone(),
28065 right: x,
28066 left_comments: Vec::new(),
28067 operator_comments: Vec::new(),
28068 trailing_comments: Vec::new(),
28069 inferred_type: None,
28070 }));
28071 let lambda1 =
28072 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28073 parameters: vec![acc_id.clone(), x_id],
28074 body: add,
28075 colon: false,
28076 parameter_types: Vec::new(),
28077 }));
28078 let lambda2 =
28079 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28080 parameters: vec![acc_id],
28081 body: acc,
28082 colon: false,
28083 parameter_types: Vec::new(),
28084 }));
28085 Ok(Expression::Function(Box::new(Function::new(
28086 "AGGREGATE".to_string(),
28087 vec![arr, zero, lambda1, lambda2],
28088 ))))
28089 }
28090 DialectType::Presto | DialectType::Athena => {
28091 // Presto/Athena keep ARRAY_SUM natively
28092 Ok(Expression::Function(Box::new(Function::new(
28093 "ARRAY_SUM".to_string(),
28094 args,
28095 ))))
28096 }
28097 DialectType::Trino => {
28098 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
28099 if args.len() == 1 {
28100 let arr = args.into_iter().next().unwrap();
28101 let zero = Expression::Literal(Box::new(Literal::Number(
28102 "0".to_string(),
28103 )));
28104 let acc_id = Identifier::new("acc");
28105 let x_id = Identifier::new("x");
28106 let acc = Expression::Identifier(acc_id.clone());
28107 let x = Expression::Identifier(x_id.clone());
28108 let add = Expression::Add(Box::new(BinaryOp {
28109 left: acc.clone(),
28110 right: x,
28111 left_comments: Vec::new(),
28112 operator_comments: Vec::new(),
28113 trailing_comments: Vec::new(),
28114 inferred_type: None,
28115 }));
28116 let lambda1 = Expression::Lambda(Box::new(
28117 crate::expressions::LambdaExpr {
28118 parameters: vec![acc_id.clone(), x_id],
28119 body: add,
28120 colon: false,
28121 parameter_types: Vec::new(),
28122 },
28123 ));
28124 let lambda2 = Expression::Lambda(Box::new(
28125 crate::expressions::LambdaExpr {
28126 parameters: vec![acc_id],
28127 body: acc,
28128 colon: false,
28129 parameter_types: Vec::new(),
28130 },
28131 ));
28132 Ok(Expression::Function(Box::new(Function::new(
28133 "REDUCE".to_string(),
28134 vec![arr, zero, lambda1, lambda2],
28135 ))))
28136 } else {
28137 Ok(Expression::Function(Box::new(Function::new(
28138 "ARRAY_SUM".to_string(),
28139 args,
28140 ))))
28141 }
28142 }
28143 DialectType::ClickHouse => {
28144 // arraySum(lambda, arr) or arraySum(arr)
28145 Ok(Expression::Function(Box::new(Function::new(
28146 "arraySum".to_string(),
28147 args,
28148 ))))
28149 }
28150 _ => Ok(Expression::Function(Box::new(Function::new(
28151 "ARRAY_SUM".to_string(),
28152 args,
28153 )))),
28154 }
28155 } else {
28156 Ok(e)
28157 }
28158 }
28159
28160 Action::ArraySizeConvert => {
28161 if let Expression::Function(f) = e {
28162 Ok(Expression::Function(Box::new(Function::new(
28163 "REPEATED_COUNT".to_string(),
28164 f.args,
28165 ))))
28166 } else {
28167 Ok(e)
28168 }
28169 }
28170
28171 Action::ArrayAnyConvert => {
28172 if let Expression::Function(f) = e {
28173 let mut args = f.args;
28174 if args.len() == 2 {
28175 let arr = args.remove(0);
28176 let lambda = args.remove(0);
28177
28178 // Extract lambda parameter name and body
28179 let (param_name, pred_body) =
28180 if let Expression::Lambda(ref lam) = lambda {
28181 let name = if let Some(p) = lam.parameters.first() {
28182 p.name.clone()
28183 } else {
28184 "x".to_string()
28185 };
28186 (name, lam.body.clone())
28187 } else {
28188 ("x".to_string(), lambda.clone())
28189 };
28190
28191 // Helper: build a function call Expression
28192 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
28193 Expression::Function(Box::new(Function::new(
28194 name.to_string(),
28195 args,
28196 )))
28197 };
28198
28199 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
28200 let build_filter_pattern = |len_func: &str,
28201 len_args_extra: Vec<Expression>,
28202 filter_expr: Expression|
28203 -> Expression {
28204 // len_func(arr, ...extra) = 0
28205 let mut len_arr_args = vec![arr.clone()];
28206 len_arr_args.extend(len_args_extra.clone());
28207 let len_arr = make_func(len_func, len_arr_args);
28208 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
28209 len_arr,
28210 Expression::number(0),
28211 )));
28212
28213 // len_func(filter_expr, ...extra) <> 0
28214 let mut len_filter_args = vec![filter_expr];
28215 len_filter_args.extend(len_args_extra);
28216 let len_filter = make_func(len_func, len_filter_args);
28217 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
28218 len_filter,
28219 Expression::number(0),
28220 )));
28221
28222 // (eq_zero OR neq_zero)
28223 let or_expr =
28224 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
28225 Expression::Paren(Box::new(Paren {
28226 this: or_expr,
28227 trailing_comments: Vec::new(),
28228 }))
28229 };
28230
28231 match target {
28232 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
28233 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
28234 }
28235 DialectType::ClickHouse => {
28236 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
28237 // ClickHouse arrayFilter takes lambda first, then array
28238 let filter_expr =
28239 make_func("arrayFilter", vec![lambda, arr.clone()]);
28240 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
28241 }
28242 DialectType::Databricks | DialectType::Spark => {
28243 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
28244 let filter_expr =
28245 make_func("FILTER", vec![arr.clone(), lambda]);
28246 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
28247 }
28248 DialectType::DuckDB => {
28249 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
28250 let filter_expr =
28251 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
28252 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
28253 }
28254 DialectType::Teradata => {
28255 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
28256 let filter_expr =
28257 make_func("FILTER", vec![arr.clone(), lambda]);
28258 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
28259 }
28260 DialectType::BigQuery => {
28261 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
28262 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
28263 let param_col = Expression::column(¶m_name);
28264 let unnest_expr = Expression::Unnest(Box::new(
28265 crate::expressions::UnnestFunc {
28266 this: arr.clone(),
28267 expressions: vec![],
28268 with_ordinality: false,
28269 alias: Some(Identifier::new(¶m_name)),
28270 offset_alias: None,
28271 },
28272 ));
28273 let mut sel = crate::expressions::Select::default();
28274 sel.expressions = vec![param_col];
28275 sel.from = Some(crate::expressions::From {
28276 expressions: vec![unnest_expr],
28277 });
28278 sel.where_clause =
28279 Some(crate::expressions::Where { this: pred_body });
28280 let array_subquery =
28281 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
28282 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
28283 }
28284 DialectType::PostgreSQL => {
28285 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
28286 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
28287 let param_col = Expression::column(¶m_name);
28288 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
28289 let unnest_with_alias =
28290 Expression::Alias(Box::new(crate::expressions::Alias {
28291 this: Expression::Unnest(Box::new(
28292 crate::expressions::UnnestFunc {
28293 this: arr.clone(),
28294 expressions: vec![],
28295 with_ordinality: false,
28296 alias: None,
28297 offset_alias: None,
28298 },
28299 )),
28300 alias: Identifier::new("_t0"),
28301 column_aliases: vec![Identifier::new(¶m_name)],
28302 alias_explicit_as: false,
28303 alias_keyword: None,
28304 pre_alias_comments: Vec::new(),
28305 trailing_comments: Vec::new(),
28306 inferred_type: None,
28307 }));
28308 let mut sel = crate::expressions::Select::default();
28309 sel.expressions = vec![param_col];
28310 sel.from = Some(crate::expressions::From {
28311 expressions: vec![unnest_with_alias],
28312 });
28313 sel.where_clause =
28314 Some(crate::expressions::Where { this: pred_body });
28315 let array_subquery =
28316 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
28317 Ok(build_filter_pattern(
28318 "ARRAY_LENGTH",
28319 vec![Expression::number(1)],
28320 array_subquery,
28321 ))
28322 }
28323 _ => Ok(Expression::Function(Box::new(Function::new(
28324 "ARRAY_ANY".to_string(),
28325 vec![arr, lambda],
28326 )))),
28327 }
28328 } else {
28329 Ok(Expression::Function(Box::new(Function::new(
28330 "ARRAY_ANY".to_string(),
28331 args,
28332 ))))
28333 }
28334 } else {
28335 Ok(e)
28336 }
28337 }
28338
28339 Action::DecodeSimplify => {
28340 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
28341 // For literal search values: CASE WHEN x = search THEN result
28342 // For NULL search: CASE WHEN x IS NULL THEN result
28343 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
28344 fn is_decode_literal(e: &Expression) -> bool {
28345 matches!(
28346 e,
28347 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
28348 )
28349 }
28350
28351 let build_decode_case =
28352 |this_expr: Expression,
28353 pairs: Vec<(Expression, Expression)>,
28354 default: Option<Expression>| {
28355 let whens: Vec<(Expression, Expression)> = pairs
28356 .into_iter()
28357 .map(|(search, result)| {
28358 if matches!(&search, Expression::Null(_)) {
28359 // NULL search -> IS NULL
28360 let condition = Expression::Is(Box::new(BinaryOp {
28361 left: this_expr.clone(),
28362 right: Expression::Null(crate::expressions::Null),
28363 left_comments: Vec::new(),
28364 operator_comments: Vec::new(),
28365 trailing_comments: Vec::new(),
28366 inferred_type: None,
28367 }));
28368 (condition, result)
28369 } else if is_decode_literal(&search)
28370 || is_decode_literal(&this_expr)
28371 {
28372 // At least one side is a literal -> simple equality (no NULL check needed)
28373 let eq = Expression::Eq(Box::new(BinaryOp {
28374 left: this_expr.clone(),
28375 right: search,
28376 left_comments: Vec::new(),
28377 operator_comments: Vec::new(),
28378 trailing_comments: Vec::new(),
28379 inferred_type: None,
28380 }));
28381 (eq, result)
28382 } else {
28383 // Non-literal -> null-safe comparison
28384 let needs_paren = matches!(
28385 &search,
28386 Expression::Eq(_)
28387 | Expression::Neq(_)
28388 | Expression::Gt(_)
28389 | Expression::Gte(_)
28390 | Expression::Lt(_)
28391 | Expression::Lte(_)
28392 );
28393 let search_ref = if needs_paren {
28394 Expression::Paren(Box::new(crate::expressions::Paren {
28395 this: search.clone(),
28396 trailing_comments: Vec::new(),
28397 }))
28398 } else {
28399 search.clone()
28400 };
28401 // Build: x = search OR (x IS NULL AND search IS NULL)
28402 let eq = Expression::Eq(Box::new(BinaryOp {
28403 left: this_expr.clone(),
28404 right: search_ref,
28405 left_comments: Vec::new(),
28406 operator_comments: Vec::new(),
28407 trailing_comments: Vec::new(),
28408 inferred_type: None,
28409 }));
28410 let search_in_null = if needs_paren {
28411 Expression::Paren(Box::new(crate::expressions::Paren {
28412 this: search.clone(),
28413 trailing_comments: Vec::new(),
28414 }))
28415 } else {
28416 search.clone()
28417 };
28418 let x_is_null = Expression::Is(Box::new(BinaryOp {
28419 left: this_expr.clone(),
28420 right: Expression::Null(crate::expressions::Null),
28421 left_comments: Vec::new(),
28422 operator_comments: Vec::new(),
28423 trailing_comments: Vec::new(),
28424 inferred_type: None,
28425 }));
28426 let search_is_null = Expression::Is(Box::new(BinaryOp {
28427 left: search_in_null,
28428 right: Expression::Null(crate::expressions::Null),
28429 left_comments: Vec::new(),
28430 operator_comments: Vec::new(),
28431 trailing_comments: Vec::new(),
28432 inferred_type: None,
28433 }));
28434 let both_null = Expression::And(Box::new(BinaryOp {
28435 left: x_is_null,
28436 right: search_is_null,
28437 left_comments: Vec::new(),
28438 operator_comments: Vec::new(),
28439 trailing_comments: Vec::new(),
28440 inferred_type: None,
28441 }));
28442 let condition = Expression::Or(Box::new(BinaryOp {
28443 left: eq,
28444 right: Expression::Paren(Box::new(
28445 crate::expressions::Paren {
28446 this: both_null,
28447 trailing_comments: Vec::new(),
28448 },
28449 )),
28450 left_comments: Vec::new(),
28451 operator_comments: Vec::new(),
28452 trailing_comments: Vec::new(),
28453 inferred_type: None,
28454 }));
28455 (condition, result)
28456 }
28457 })
28458 .collect();
28459 Expression::Case(Box::new(Case {
28460 operand: None,
28461 whens,
28462 else_: default,
28463 comments: Vec::new(),
28464 inferred_type: None,
28465 }))
28466 };
28467
28468 if let Expression::Decode(decode) = e {
28469 Ok(build_decode_case(
28470 decode.this,
28471 decode.search_results,
28472 decode.default,
28473 ))
28474 } else if let Expression::DecodeCase(dc) = e {
28475 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
28476 let mut exprs = dc.expressions;
28477 if exprs.len() < 3 {
28478 return Ok(Expression::DecodeCase(Box::new(
28479 crate::expressions::DecodeCase { expressions: exprs },
28480 )));
28481 }
28482 let this_expr = exprs.remove(0);
28483 let mut pairs = Vec::new();
28484 let mut default = None;
28485 let mut i = 0;
28486 while i + 1 < exprs.len() {
28487 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
28488 i += 2;
28489 }
28490 if i < exprs.len() {
28491 // Odd remaining element is the default
28492 default = Some(exprs[i].clone());
28493 }
28494 Ok(build_decode_case(this_expr, pairs, default))
28495 } else {
28496 Ok(e)
28497 }
28498 }
28499
28500 Action::CreateTableLikeToCtas => {
28501 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
28502 if let Expression::CreateTable(ct) = e {
28503 let like_source = ct.constraints.iter().find_map(|c| {
28504 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28505 Some(source.clone())
28506 } else {
28507 None
28508 }
28509 });
28510 if let Some(source_table) = like_source {
28511 let mut new_ct = *ct;
28512 new_ct.constraints.clear();
28513 // Build: SELECT * FROM b LIMIT 0
28514 let select = Expression::Select(Box::new(crate::expressions::Select {
28515 expressions: vec![Expression::Star(crate::expressions::Star {
28516 table: None,
28517 except: None,
28518 replace: None,
28519 rename: None,
28520 trailing_comments: Vec::new(),
28521 span: None,
28522 })],
28523 from: Some(crate::expressions::From {
28524 expressions: vec![Expression::Table(Box::new(source_table))],
28525 }),
28526 limit: Some(crate::expressions::Limit {
28527 this: Expression::Literal(Box::new(Literal::Number(
28528 "0".to_string(),
28529 ))),
28530 percent: false,
28531 comments: Vec::new(),
28532 }),
28533 ..Default::default()
28534 }));
28535 new_ct.as_select = Some(select);
28536 Ok(Expression::CreateTable(Box::new(new_ct)))
28537 } else {
28538 Ok(Expression::CreateTable(ct))
28539 }
28540 } else {
28541 Ok(e)
28542 }
28543 }
28544
28545 Action::CreateTableLikeToSelectInto => {
28546 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
28547 if let Expression::CreateTable(ct) = e {
28548 let like_source = ct.constraints.iter().find_map(|c| {
28549 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28550 Some(source.clone())
28551 } else {
28552 None
28553 }
28554 });
28555 if let Some(source_table) = like_source {
28556 let mut aliased_source = source_table;
28557 aliased_source.alias = Some(Identifier::new("temp"));
28558 // Build: SELECT TOP 0 * INTO a FROM b AS temp
28559 let select = Expression::Select(Box::new(crate::expressions::Select {
28560 expressions: vec![Expression::Star(crate::expressions::Star {
28561 table: None,
28562 except: None,
28563 replace: None,
28564 rename: None,
28565 trailing_comments: Vec::new(),
28566 span: None,
28567 })],
28568 from: Some(crate::expressions::From {
28569 expressions: vec![Expression::Table(Box::new(aliased_source))],
28570 }),
28571 into: Some(crate::expressions::SelectInto {
28572 this: Expression::Table(Box::new(ct.name.clone())),
28573 temporary: false,
28574 unlogged: false,
28575 bulk_collect: false,
28576 expressions: Vec::new(),
28577 }),
28578 top: Some(crate::expressions::Top {
28579 this: Expression::Literal(Box::new(Literal::Number(
28580 "0".to_string(),
28581 ))),
28582 percent: false,
28583 with_ties: false,
28584 parenthesized: false,
28585 }),
28586 ..Default::default()
28587 }));
28588 Ok(select)
28589 } else {
28590 Ok(Expression::CreateTable(ct))
28591 }
28592 } else {
28593 Ok(e)
28594 }
28595 }
28596
28597 Action::CreateTableLikeToAs => {
28598 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
28599 if let Expression::CreateTable(ct) = e {
28600 let like_source = ct.constraints.iter().find_map(|c| {
28601 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28602 Some(source.clone())
28603 } else {
28604 None
28605 }
28606 });
28607 if let Some(source_table) = like_source {
28608 let mut new_ct = *ct;
28609 new_ct.constraints.clear();
28610 // AS b (just a table reference, not a SELECT)
28611 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
28612 Ok(Expression::CreateTable(Box::new(new_ct)))
28613 } else {
28614 Ok(Expression::CreateTable(ct))
28615 }
28616 } else {
28617 Ok(e)
28618 }
28619 }
28620
28621 Action::TsOrDsToDateConvert => {
28622 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
28623 if let Expression::Function(f) = e {
28624 let mut args = f.args;
28625 let this = args.remove(0);
28626 let fmt = if !args.is_empty() {
28627 match &args[0] {
28628 Expression::Literal(lit)
28629 if matches!(lit.as_ref(), Literal::String(_)) =>
28630 {
28631 let Literal::String(s) = lit.as_ref() else {
28632 unreachable!()
28633 };
28634 Some(s.clone())
28635 }
28636 _ => None,
28637 }
28638 } else {
28639 None
28640 };
28641 Ok(Expression::TsOrDsToDate(Box::new(
28642 crate::expressions::TsOrDsToDate {
28643 this: Box::new(this),
28644 format: fmt,
28645 safe: None,
28646 },
28647 )))
28648 } else {
28649 Ok(e)
28650 }
28651 }
28652
28653 Action::TsOrDsToDateStrConvert => {
28654 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
28655 if let Expression::Function(f) = e {
28656 let arg = f.args.into_iter().next().unwrap();
28657 let str_type = match target {
28658 DialectType::DuckDB
28659 | DialectType::PostgreSQL
28660 | DialectType::Materialize => DataType::Text,
28661 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28662 DataType::Custom {
28663 name: "STRING".to_string(),
28664 }
28665 }
28666 DialectType::Presto
28667 | DialectType::Trino
28668 | DialectType::Athena
28669 | DialectType::Drill => DataType::VarChar {
28670 length: None,
28671 parenthesized_length: false,
28672 },
28673 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
28674 DataType::Custom {
28675 name: "STRING".to_string(),
28676 }
28677 }
28678 _ => DataType::VarChar {
28679 length: None,
28680 parenthesized_length: false,
28681 },
28682 };
28683 let cast_expr = Expression::Cast(Box::new(Cast {
28684 this: arg,
28685 to: str_type,
28686 double_colon_syntax: false,
28687 trailing_comments: Vec::new(),
28688 format: None,
28689 default: None,
28690 inferred_type: None,
28691 }));
28692 Ok(Expression::Substring(Box::new(
28693 crate::expressions::SubstringFunc {
28694 this: cast_expr,
28695 start: Expression::number(1),
28696 length: Some(Expression::number(10)),
28697 from_for_syntax: false,
28698 },
28699 )))
28700 } else {
28701 Ok(e)
28702 }
28703 }
28704
28705 Action::DateStrToDateConvert => {
28706 // DATE_STR_TO_DATE(x) -> dialect-specific
28707 if let Expression::Function(f) = e {
28708 let arg = f.args.into_iter().next().unwrap();
28709 match target {
28710 DialectType::SQLite => {
28711 // SQLite: just the bare expression (dates are strings)
28712 Ok(arg)
28713 }
28714 _ => Ok(Expression::Cast(Box::new(Cast {
28715 this: arg,
28716 to: DataType::Date,
28717 double_colon_syntax: false,
28718 trailing_comments: Vec::new(),
28719 format: None,
28720 default: None,
28721 inferred_type: None,
28722 }))),
28723 }
28724 } else {
28725 Ok(e)
28726 }
28727 }
28728
28729 Action::TimeStrToDateConvert => {
28730 // TIME_STR_TO_DATE(x) -> dialect-specific
28731 if let Expression::Function(f) = e {
28732 let arg = f.args.into_iter().next().unwrap();
28733 match target {
28734 DialectType::Hive
28735 | DialectType::Doris
28736 | DialectType::StarRocks
28737 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
28738 Function::new("TO_DATE".to_string(), vec![arg]),
28739 ))),
28740 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28741 // Presto: CAST(x AS TIMESTAMP)
28742 Ok(Expression::Cast(Box::new(Cast {
28743 this: arg,
28744 to: DataType::Timestamp {
28745 timezone: false,
28746 precision: None,
28747 },
28748 double_colon_syntax: false,
28749 trailing_comments: Vec::new(),
28750 format: None,
28751 default: None,
28752 inferred_type: None,
28753 })))
28754 }
28755 _ => {
28756 // Default: CAST(x AS DATE)
28757 Ok(Expression::Cast(Box::new(Cast {
28758 this: arg,
28759 to: DataType::Date,
28760 double_colon_syntax: false,
28761 trailing_comments: Vec::new(),
28762 format: None,
28763 default: None,
28764 inferred_type: None,
28765 })))
28766 }
28767 }
28768 } else {
28769 Ok(e)
28770 }
28771 }
28772
28773 Action::TimeStrToTimeConvert => {
28774 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
28775 if let Expression::Function(f) = e {
28776 let mut args = f.args;
28777 let this = args.remove(0);
28778 let zone = if !args.is_empty() {
28779 match &args[0] {
28780 Expression::Literal(lit)
28781 if matches!(lit.as_ref(), Literal::String(_)) =>
28782 {
28783 let Literal::String(s) = lit.as_ref() else {
28784 unreachable!()
28785 };
28786 Some(s.clone())
28787 }
28788 _ => None,
28789 }
28790 } else {
28791 None
28792 };
28793 let has_zone = zone.is_some();
28794
28795 match target {
28796 DialectType::SQLite => {
28797 // SQLite: just the bare expression
28798 Ok(this)
28799 }
28800 DialectType::MySQL => {
28801 if has_zone {
28802 // MySQL with zone: TIMESTAMP(x)
28803 Ok(Expression::Function(Box::new(Function::new(
28804 "TIMESTAMP".to_string(),
28805 vec![this],
28806 ))))
28807 } else {
28808 // MySQL: CAST(x AS DATETIME) or with precision
28809 // Use DataType::Custom to avoid MySQL's transform_cast converting
28810 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
28811 let precision = if let Expression::Literal(ref lit) = this {
28812 if let Literal::String(ref s) = lit.as_ref() {
28813 if let Some(dot_pos) = s.rfind('.') {
28814 let frac = &s[dot_pos + 1..];
28815 let digit_count = frac
28816 .chars()
28817 .take_while(|c| c.is_ascii_digit())
28818 .count();
28819 if digit_count > 0 {
28820 Some(digit_count)
28821 } else {
28822 None
28823 }
28824 } else {
28825 None
28826 }
28827 } else {
28828 None
28829 }
28830 } else {
28831 None
28832 };
28833 let type_name = match precision {
28834 Some(p) => format!("DATETIME({})", p),
28835 None => "DATETIME".to_string(),
28836 };
28837 Ok(Expression::Cast(Box::new(Cast {
28838 this,
28839 to: DataType::Custom { name: type_name },
28840 double_colon_syntax: false,
28841 trailing_comments: Vec::new(),
28842 format: None,
28843 default: None,
28844 inferred_type: None,
28845 })))
28846 }
28847 }
28848 DialectType::ClickHouse => {
28849 if has_zone {
28850 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
28851 // We need to strip the timezone offset from the literal if present
28852 let clean_this = if let Expression::Literal(ref lit) = this {
28853 if let Literal::String(ref s) = lit.as_ref() {
28854 // Strip timezone offset like "-08:00" or "+00:00"
28855 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
28856 if let Some(offset_pos) = re_offset {
28857 if offset_pos > 10 {
28858 // After the date part
28859 let trimmed = s[..offset_pos].to_string();
28860 Expression::Literal(Box::new(Literal::String(
28861 trimmed,
28862 )))
28863 } else {
28864 this.clone()
28865 }
28866 } else {
28867 this.clone()
28868 }
28869 } else {
28870 this.clone()
28871 }
28872 } else {
28873 this.clone()
28874 };
28875 let zone_str = zone.unwrap();
28876 // Build: CAST(x AS DateTime64(6, 'zone'))
28877 let type_name = format!("DateTime64(6, '{}')", zone_str);
28878 Ok(Expression::Cast(Box::new(Cast {
28879 this: clean_this,
28880 to: DataType::Custom { name: type_name },
28881 double_colon_syntax: false,
28882 trailing_comments: Vec::new(),
28883 format: None,
28884 default: None,
28885 inferred_type: None,
28886 })))
28887 } else {
28888 Ok(Expression::Cast(Box::new(Cast {
28889 this,
28890 to: DataType::Custom {
28891 name: "DateTime64(6)".to_string(),
28892 },
28893 double_colon_syntax: false,
28894 trailing_comments: Vec::new(),
28895 format: None,
28896 default: None,
28897 inferred_type: None,
28898 })))
28899 }
28900 }
28901 DialectType::BigQuery => {
28902 if has_zone {
28903 // BigQuery with zone: CAST(x AS TIMESTAMP)
28904 Ok(Expression::Cast(Box::new(Cast {
28905 this,
28906 to: DataType::Timestamp {
28907 timezone: false,
28908 precision: None,
28909 },
28910 double_colon_syntax: false,
28911 trailing_comments: Vec::new(),
28912 format: None,
28913 default: None,
28914 inferred_type: None,
28915 })))
28916 } else {
28917 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
28918 Ok(Expression::Cast(Box::new(Cast {
28919 this,
28920 to: DataType::Custom {
28921 name: "DATETIME".to_string(),
28922 },
28923 double_colon_syntax: false,
28924 trailing_comments: Vec::new(),
28925 format: None,
28926 default: None,
28927 inferred_type: None,
28928 })))
28929 }
28930 }
28931 DialectType::Doris => {
28932 // Doris: CAST(x AS DATETIME)
28933 Ok(Expression::Cast(Box::new(Cast {
28934 this,
28935 to: DataType::Custom {
28936 name: "DATETIME".to_string(),
28937 },
28938 double_colon_syntax: false,
28939 trailing_comments: Vec::new(),
28940 format: None,
28941 default: None,
28942 inferred_type: None,
28943 })))
28944 }
28945 DialectType::TSQL | DialectType::Fabric => {
28946 if has_zone {
28947 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
28948 let cast_expr = Expression::Cast(Box::new(Cast {
28949 this,
28950 to: DataType::Custom {
28951 name: "DATETIMEOFFSET".to_string(),
28952 },
28953 double_colon_syntax: false,
28954 trailing_comments: Vec::new(),
28955 format: None,
28956 default: None,
28957 inferred_type: None,
28958 }));
28959 Ok(Expression::AtTimeZone(Box::new(
28960 crate::expressions::AtTimeZone {
28961 this: cast_expr,
28962 zone: Expression::Literal(Box::new(Literal::String(
28963 "UTC".to_string(),
28964 ))),
28965 },
28966 )))
28967 } else {
28968 // TSQL: CAST(x AS DATETIME2)
28969 Ok(Expression::Cast(Box::new(Cast {
28970 this,
28971 to: DataType::Custom {
28972 name: "DATETIME2".to_string(),
28973 },
28974 double_colon_syntax: false,
28975 trailing_comments: Vec::new(),
28976 format: None,
28977 default: None,
28978 inferred_type: None,
28979 })))
28980 }
28981 }
28982 DialectType::DuckDB => {
28983 if has_zone {
28984 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
28985 Ok(Expression::Cast(Box::new(Cast {
28986 this,
28987 to: DataType::Timestamp {
28988 timezone: true,
28989 precision: None,
28990 },
28991 double_colon_syntax: false,
28992 trailing_comments: Vec::new(),
28993 format: None,
28994 default: None,
28995 inferred_type: None,
28996 })))
28997 } else {
28998 // DuckDB: CAST(x AS TIMESTAMP)
28999 Ok(Expression::Cast(Box::new(Cast {
29000 this,
29001 to: DataType::Timestamp {
29002 timezone: false,
29003 precision: None,
29004 },
29005 double_colon_syntax: false,
29006 trailing_comments: Vec::new(),
29007 format: None,
29008 default: None,
29009 inferred_type: None,
29010 })))
29011 }
29012 }
29013 DialectType::PostgreSQL
29014 | DialectType::Materialize
29015 | DialectType::RisingWave => {
29016 if has_zone {
29017 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
29018 Ok(Expression::Cast(Box::new(Cast {
29019 this,
29020 to: DataType::Timestamp {
29021 timezone: true,
29022 precision: None,
29023 },
29024 double_colon_syntax: false,
29025 trailing_comments: Vec::new(),
29026 format: None,
29027 default: None,
29028 inferred_type: None,
29029 })))
29030 } else {
29031 // PostgreSQL: CAST(x AS TIMESTAMP)
29032 Ok(Expression::Cast(Box::new(Cast {
29033 this,
29034 to: DataType::Timestamp {
29035 timezone: false,
29036 precision: None,
29037 },
29038 double_colon_syntax: false,
29039 trailing_comments: Vec::new(),
29040 format: None,
29041 default: None,
29042 inferred_type: None,
29043 })))
29044 }
29045 }
29046 DialectType::Snowflake => {
29047 if has_zone {
29048 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
29049 Ok(Expression::Cast(Box::new(Cast {
29050 this,
29051 to: DataType::Timestamp {
29052 timezone: true,
29053 precision: None,
29054 },
29055 double_colon_syntax: false,
29056 trailing_comments: Vec::new(),
29057 format: None,
29058 default: None,
29059 inferred_type: None,
29060 })))
29061 } else {
29062 // Snowflake: CAST(x AS TIMESTAMP)
29063 Ok(Expression::Cast(Box::new(Cast {
29064 this,
29065 to: DataType::Timestamp {
29066 timezone: false,
29067 precision: None,
29068 },
29069 double_colon_syntax: false,
29070 trailing_comments: Vec::new(),
29071 format: None,
29072 default: None,
29073 inferred_type: None,
29074 })))
29075 }
29076 }
29077 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29078 if has_zone {
29079 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
29080 // Check for precision from sub-second digits
29081 let precision = if let Expression::Literal(ref lit) = this {
29082 if let Literal::String(ref s) = lit.as_ref() {
29083 if let Some(dot_pos) = s.rfind('.') {
29084 let frac = &s[dot_pos + 1..];
29085 let digit_count = frac
29086 .chars()
29087 .take_while(|c| c.is_ascii_digit())
29088 .count();
29089 if digit_count > 0
29090 && matches!(target, DialectType::Trino)
29091 {
29092 Some(digit_count as u32)
29093 } else {
29094 None
29095 }
29096 } else {
29097 None
29098 }
29099 } else {
29100 None
29101 }
29102 } else {
29103 None
29104 };
29105 let dt = if let Some(prec) = precision {
29106 DataType::Timestamp {
29107 timezone: true,
29108 precision: Some(prec),
29109 }
29110 } else {
29111 DataType::Timestamp {
29112 timezone: true,
29113 precision: None,
29114 }
29115 };
29116 Ok(Expression::Cast(Box::new(Cast {
29117 this,
29118 to: dt,
29119 double_colon_syntax: false,
29120 trailing_comments: Vec::new(),
29121 format: None,
29122 default: None,
29123 inferred_type: None,
29124 })))
29125 } else {
29126 // Check for sub-second precision for Trino
29127 let precision = if let Expression::Literal(ref lit) = this {
29128 if let Literal::String(ref s) = lit.as_ref() {
29129 if let Some(dot_pos) = s.rfind('.') {
29130 let frac = &s[dot_pos + 1..];
29131 let digit_count = frac
29132 .chars()
29133 .take_while(|c| c.is_ascii_digit())
29134 .count();
29135 if digit_count > 0
29136 && matches!(target, DialectType::Trino)
29137 {
29138 Some(digit_count as u32)
29139 } else {
29140 None
29141 }
29142 } else {
29143 None
29144 }
29145 } else {
29146 None
29147 }
29148 } else {
29149 None
29150 };
29151 let dt = DataType::Timestamp {
29152 timezone: false,
29153 precision,
29154 };
29155 Ok(Expression::Cast(Box::new(Cast {
29156 this,
29157 to: dt,
29158 double_colon_syntax: false,
29159 trailing_comments: Vec::new(),
29160 format: None,
29161 default: None,
29162 inferred_type: None,
29163 })))
29164 }
29165 }
29166 DialectType::Redshift => {
29167 if has_zone {
29168 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
29169 Ok(Expression::Cast(Box::new(Cast {
29170 this,
29171 to: DataType::Timestamp {
29172 timezone: true,
29173 precision: None,
29174 },
29175 double_colon_syntax: false,
29176 trailing_comments: Vec::new(),
29177 format: None,
29178 default: None,
29179 inferred_type: None,
29180 })))
29181 } else {
29182 // Redshift: CAST(x AS TIMESTAMP)
29183 Ok(Expression::Cast(Box::new(Cast {
29184 this,
29185 to: DataType::Timestamp {
29186 timezone: false,
29187 precision: None,
29188 },
29189 double_colon_syntax: false,
29190 trailing_comments: Vec::new(),
29191 format: None,
29192 default: None,
29193 inferred_type: None,
29194 })))
29195 }
29196 }
29197 _ => {
29198 // Default: CAST(x AS TIMESTAMP)
29199 Ok(Expression::Cast(Box::new(Cast {
29200 this,
29201 to: DataType::Timestamp {
29202 timezone: false,
29203 precision: None,
29204 },
29205 double_colon_syntax: false,
29206 trailing_comments: Vec::new(),
29207 format: None,
29208 default: None,
29209 inferred_type: None,
29210 })))
29211 }
29212 }
29213 } else {
29214 Ok(e)
29215 }
29216 }
29217
29218 Action::DateToDateStrConvert => {
29219 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
29220 if let Expression::Function(f) = e {
29221 let arg = f.args.into_iter().next().unwrap();
29222 let str_type = match target {
29223 DialectType::DuckDB => DataType::Text,
29224 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29225 DataType::Custom {
29226 name: "STRING".to_string(),
29227 }
29228 }
29229 DialectType::Presto
29230 | DialectType::Trino
29231 | DialectType::Athena
29232 | DialectType::Drill => DataType::VarChar {
29233 length: None,
29234 parenthesized_length: false,
29235 },
29236 _ => DataType::VarChar {
29237 length: None,
29238 parenthesized_length: false,
29239 },
29240 };
29241 Ok(Expression::Cast(Box::new(Cast {
29242 this: arg,
29243 to: str_type,
29244 double_colon_syntax: false,
29245 trailing_comments: Vec::new(),
29246 format: None,
29247 default: None,
29248 inferred_type: None,
29249 })))
29250 } else {
29251 Ok(e)
29252 }
29253 }
29254
29255 Action::DateToDiConvert => {
29256 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
29257 if let Expression::Function(f) = e {
29258 let arg = f.args.into_iter().next().unwrap();
29259 let inner = match target {
29260 DialectType::DuckDB => {
29261 // STRFTIME(x, '%Y%m%d')
29262 Expression::Function(Box::new(Function::new(
29263 "STRFTIME".to_string(),
29264 vec![arg, Expression::string("%Y%m%d")],
29265 )))
29266 }
29267 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29268 // DATE_FORMAT(x, 'yyyyMMdd')
29269 Expression::Function(Box::new(Function::new(
29270 "DATE_FORMAT".to_string(),
29271 vec![arg, Expression::string("yyyyMMdd")],
29272 )))
29273 }
29274 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29275 // DATE_FORMAT(x, '%Y%m%d')
29276 Expression::Function(Box::new(Function::new(
29277 "DATE_FORMAT".to_string(),
29278 vec![arg, Expression::string("%Y%m%d")],
29279 )))
29280 }
29281 DialectType::Drill => {
29282 // TO_DATE(x, 'yyyyMMdd')
29283 Expression::Function(Box::new(Function::new(
29284 "TO_DATE".to_string(),
29285 vec![arg, Expression::string("yyyyMMdd")],
29286 )))
29287 }
29288 _ => {
29289 // Default: STRFTIME(x, '%Y%m%d')
29290 Expression::Function(Box::new(Function::new(
29291 "STRFTIME".to_string(),
29292 vec![arg, Expression::string("%Y%m%d")],
29293 )))
29294 }
29295 };
29296 // Use INT (not INTEGER) for Presto/Trino
29297 let int_type = match target {
29298 DialectType::Presto
29299 | DialectType::Trino
29300 | DialectType::Athena
29301 | DialectType::TSQL
29302 | DialectType::Fabric
29303 | DialectType::SQLite
29304 | DialectType::Redshift => DataType::Custom {
29305 name: "INT".to_string(),
29306 },
29307 _ => DataType::Int {
29308 length: None,
29309 integer_spelling: false,
29310 },
29311 };
29312 Ok(Expression::Cast(Box::new(Cast {
29313 this: inner,
29314 to: int_type,
29315 double_colon_syntax: false,
29316 trailing_comments: Vec::new(),
29317 format: None,
29318 default: None,
29319 inferred_type: None,
29320 })))
29321 } else {
29322 Ok(e)
29323 }
29324 }
29325
29326 Action::DiToDateConvert => {
29327 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
29328 if let Expression::Function(f) = e {
29329 let arg = f.args.into_iter().next().unwrap();
29330 match target {
29331 DialectType::DuckDB => {
29332 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
29333 let cast_text = Expression::Cast(Box::new(Cast {
29334 this: arg,
29335 to: DataType::Text,
29336 double_colon_syntax: false,
29337 trailing_comments: Vec::new(),
29338 format: None,
29339 default: None,
29340 inferred_type: None,
29341 }));
29342 let strptime = Expression::Function(Box::new(Function::new(
29343 "STRPTIME".to_string(),
29344 vec![cast_text, Expression::string("%Y%m%d")],
29345 )));
29346 Ok(Expression::Cast(Box::new(Cast {
29347 this: strptime,
29348 to: DataType::Date,
29349 double_colon_syntax: false,
29350 trailing_comments: Vec::new(),
29351 format: None,
29352 default: None,
29353 inferred_type: None,
29354 })))
29355 }
29356 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29357 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
29358 let cast_str = Expression::Cast(Box::new(Cast {
29359 this: arg,
29360 to: DataType::Custom {
29361 name: "STRING".to_string(),
29362 },
29363 double_colon_syntax: false,
29364 trailing_comments: Vec::new(),
29365 format: None,
29366 default: None,
29367 inferred_type: None,
29368 }));
29369 Ok(Expression::Function(Box::new(Function::new(
29370 "TO_DATE".to_string(),
29371 vec![cast_str, Expression::string("yyyyMMdd")],
29372 ))))
29373 }
29374 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29375 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
29376 let cast_varchar = Expression::Cast(Box::new(Cast {
29377 this: arg,
29378 to: DataType::VarChar {
29379 length: None,
29380 parenthesized_length: false,
29381 },
29382 double_colon_syntax: false,
29383 trailing_comments: Vec::new(),
29384 format: None,
29385 default: None,
29386 inferred_type: None,
29387 }));
29388 let date_parse = Expression::Function(Box::new(Function::new(
29389 "DATE_PARSE".to_string(),
29390 vec![cast_varchar, Expression::string("%Y%m%d")],
29391 )));
29392 Ok(Expression::Cast(Box::new(Cast {
29393 this: date_parse,
29394 to: DataType::Date,
29395 double_colon_syntax: false,
29396 trailing_comments: Vec::new(),
29397 format: None,
29398 default: None,
29399 inferred_type: None,
29400 })))
29401 }
29402 DialectType::Drill => {
29403 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
29404 let cast_varchar = Expression::Cast(Box::new(Cast {
29405 this: arg,
29406 to: DataType::VarChar {
29407 length: None,
29408 parenthesized_length: false,
29409 },
29410 double_colon_syntax: false,
29411 trailing_comments: Vec::new(),
29412 format: None,
29413 default: None,
29414 inferred_type: None,
29415 }));
29416 Ok(Expression::Function(Box::new(Function::new(
29417 "TO_DATE".to_string(),
29418 vec![cast_varchar, Expression::string("yyyyMMdd")],
29419 ))))
29420 }
29421 _ => Ok(Expression::Function(Box::new(Function::new(
29422 "DI_TO_DATE".to_string(),
29423 vec![arg],
29424 )))),
29425 }
29426 } else {
29427 Ok(e)
29428 }
29429 }
29430
29431 Action::TsOrDiToDiConvert => {
29432 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
29433 if let Expression::Function(f) = e {
29434 let arg = f.args.into_iter().next().unwrap();
29435 let str_type = match target {
29436 DialectType::DuckDB => DataType::Text,
29437 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29438 DataType::Custom {
29439 name: "STRING".to_string(),
29440 }
29441 }
29442 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29443 DataType::VarChar {
29444 length: None,
29445 parenthesized_length: false,
29446 }
29447 }
29448 _ => DataType::VarChar {
29449 length: None,
29450 parenthesized_length: false,
29451 },
29452 };
29453 let cast_str = Expression::Cast(Box::new(Cast {
29454 this: arg,
29455 to: str_type,
29456 double_colon_syntax: false,
29457 trailing_comments: Vec::new(),
29458 format: None,
29459 default: None,
29460 inferred_type: None,
29461 }));
29462 let replace_expr = Expression::Function(Box::new(Function::new(
29463 "REPLACE".to_string(),
29464 vec![cast_str, Expression::string("-"), Expression::string("")],
29465 )));
29466 let substr_name = match target {
29467 DialectType::DuckDB
29468 | DialectType::Hive
29469 | DialectType::Spark
29470 | DialectType::Databricks => "SUBSTR",
29471 _ => "SUBSTR",
29472 };
29473 let substr = Expression::Function(Box::new(Function::new(
29474 substr_name.to_string(),
29475 vec![replace_expr, Expression::number(1), Expression::number(8)],
29476 )));
29477 // Use INT (not INTEGER) for Presto/Trino etc.
29478 let int_type = match target {
29479 DialectType::Presto
29480 | DialectType::Trino
29481 | DialectType::Athena
29482 | DialectType::TSQL
29483 | DialectType::Fabric
29484 | DialectType::SQLite
29485 | DialectType::Redshift => DataType::Custom {
29486 name: "INT".to_string(),
29487 },
29488 _ => DataType::Int {
29489 length: None,
29490 integer_spelling: false,
29491 },
29492 };
29493 Ok(Expression::Cast(Box::new(Cast {
29494 this: substr,
29495 to: int_type,
29496 double_colon_syntax: false,
29497 trailing_comments: Vec::new(),
29498 format: None,
29499 default: None,
29500 inferred_type: None,
29501 })))
29502 } else {
29503 Ok(e)
29504 }
29505 }
29506
29507 Action::UnixToStrConvert => {
29508 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
29509 if let Expression::Function(f) = e {
29510 let mut args = f.args;
29511 let this = args.remove(0);
29512 let fmt_expr = if !args.is_empty() {
29513 Some(args.remove(0))
29514 } else {
29515 None
29516 };
29517
29518 // Check if format is a string literal
29519 let fmt_str = fmt_expr.as_ref().and_then(|f| {
29520 if let Expression::Literal(lit) = f {
29521 if let Literal::String(s) = lit.as_ref() {
29522 Some(s.clone())
29523 } else {
29524 None
29525 }
29526 } else {
29527 None
29528 }
29529 });
29530
29531 if let Some(fmt_string) = fmt_str {
29532 // String literal format -> use UnixToStr expression (generator handles it)
29533 Ok(Expression::UnixToStr(Box::new(
29534 crate::expressions::UnixToStr {
29535 this: Box::new(this),
29536 format: Some(fmt_string),
29537 },
29538 )))
29539 } else if let Some(fmt_e) = fmt_expr {
29540 // Non-literal format (e.g., identifier `y`) -> build target expression directly
29541 match target {
29542 DialectType::DuckDB => {
29543 // STRFTIME(TO_TIMESTAMP(x), y)
29544 let to_ts = Expression::Function(Box::new(Function::new(
29545 "TO_TIMESTAMP".to_string(),
29546 vec![this],
29547 )));
29548 Ok(Expression::Function(Box::new(Function::new(
29549 "STRFTIME".to_string(),
29550 vec![to_ts, fmt_e],
29551 ))))
29552 }
29553 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29554 // DATE_FORMAT(FROM_UNIXTIME(x), y)
29555 let from_unix = Expression::Function(Box::new(Function::new(
29556 "FROM_UNIXTIME".to_string(),
29557 vec![this],
29558 )));
29559 Ok(Expression::Function(Box::new(Function::new(
29560 "DATE_FORMAT".to_string(),
29561 vec![from_unix, fmt_e],
29562 ))))
29563 }
29564 DialectType::Hive
29565 | DialectType::Spark
29566 | DialectType::Databricks
29567 | DialectType::Doris
29568 | DialectType::StarRocks => {
29569 // FROM_UNIXTIME(x, y)
29570 Ok(Expression::Function(Box::new(Function::new(
29571 "FROM_UNIXTIME".to_string(),
29572 vec![this, fmt_e],
29573 ))))
29574 }
29575 _ => {
29576 // Default: keep as UNIX_TO_STR(x, y)
29577 Ok(Expression::Function(Box::new(Function::new(
29578 "UNIX_TO_STR".to_string(),
29579 vec![this, fmt_e],
29580 ))))
29581 }
29582 }
29583 } else {
29584 Ok(Expression::UnixToStr(Box::new(
29585 crate::expressions::UnixToStr {
29586 this: Box::new(this),
29587 format: None,
29588 },
29589 )))
29590 }
29591 } else {
29592 Ok(e)
29593 }
29594 }
29595
29596 Action::UnixToTimeConvert => {
29597 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
29598 if let Expression::Function(f) = e {
29599 let arg = f.args.into_iter().next().unwrap();
29600 Ok(Expression::UnixToTime(Box::new(
29601 crate::expressions::UnixToTime {
29602 this: Box::new(arg),
29603 scale: None,
29604 zone: None,
29605 hours: None,
29606 minutes: None,
29607 format: None,
29608 target_type: None,
29609 },
29610 )))
29611 } else {
29612 Ok(e)
29613 }
29614 }
29615
29616 Action::UnixToTimeStrConvert => {
29617 // UNIX_TO_TIME_STR(x) -> dialect-specific
29618 if let Expression::Function(f) = e {
29619 let arg = f.args.into_iter().next().unwrap();
29620 match target {
29621 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29622 // FROM_UNIXTIME(x)
29623 Ok(Expression::Function(Box::new(Function::new(
29624 "FROM_UNIXTIME".to_string(),
29625 vec![arg],
29626 ))))
29627 }
29628 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29629 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
29630 let from_unix = Expression::Function(Box::new(Function::new(
29631 "FROM_UNIXTIME".to_string(),
29632 vec![arg],
29633 )));
29634 Ok(Expression::Cast(Box::new(Cast {
29635 this: from_unix,
29636 to: DataType::VarChar {
29637 length: None,
29638 parenthesized_length: false,
29639 },
29640 double_colon_syntax: false,
29641 trailing_comments: Vec::new(),
29642 format: None,
29643 default: None,
29644 inferred_type: None,
29645 })))
29646 }
29647 DialectType::DuckDB => {
29648 // CAST(TO_TIMESTAMP(x) AS TEXT)
29649 let to_ts = Expression::Function(Box::new(Function::new(
29650 "TO_TIMESTAMP".to_string(),
29651 vec![arg],
29652 )));
29653 Ok(Expression::Cast(Box::new(Cast {
29654 this: to_ts,
29655 to: DataType::Text,
29656 double_colon_syntax: false,
29657 trailing_comments: Vec::new(),
29658 format: None,
29659 default: None,
29660 inferred_type: None,
29661 })))
29662 }
29663 _ => Ok(Expression::Function(Box::new(Function::new(
29664 "UNIX_TO_TIME_STR".to_string(),
29665 vec![arg],
29666 )))),
29667 }
29668 } else {
29669 Ok(e)
29670 }
29671 }
29672
29673 Action::TimeToUnixConvert => {
29674 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
29675 if let Expression::Function(f) = e {
29676 let arg = f.args.into_iter().next().unwrap();
29677 Ok(Expression::TimeToUnix(Box::new(
29678 crate::expressions::UnaryFunc {
29679 this: arg,
29680 original_name: None,
29681 inferred_type: None,
29682 },
29683 )))
29684 } else {
29685 Ok(e)
29686 }
29687 }
29688
29689 Action::TimeToStrConvert => {
29690 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
29691 if let Expression::Function(f) = e {
29692 let mut args = f.args;
29693 let this = args.remove(0);
29694 let fmt = match args.remove(0) {
29695 Expression::Literal(lit)
29696 if matches!(lit.as_ref(), Literal::String(_)) =>
29697 {
29698 let Literal::String(s) = lit.as_ref() else {
29699 unreachable!()
29700 };
29701 s.clone()
29702 }
29703 other => {
29704 return Ok(Expression::Function(Box::new(Function::new(
29705 "TIME_TO_STR".to_string(),
29706 vec![this, other],
29707 ))));
29708 }
29709 };
29710 Ok(Expression::TimeToStr(Box::new(
29711 crate::expressions::TimeToStr {
29712 this: Box::new(this),
29713 format: fmt,
29714 culture: None,
29715 zone: None,
29716 },
29717 )))
29718 } else {
29719 Ok(e)
29720 }
29721 }
29722
29723 Action::StrToUnixConvert => {
29724 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
29725 if let Expression::Function(f) = e {
29726 let mut args = f.args;
29727 let this = args.remove(0);
29728 let fmt = match args.remove(0) {
29729 Expression::Literal(lit)
29730 if matches!(lit.as_ref(), Literal::String(_)) =>
29731 {
29732 let Literal::String(s) = lit.as_ref() else {
29733 unreachable!()
29734 };
29735 s.clone()
29736 }
29737 other => {
29738 return Ok(Expression::Function(Box::new(Function::new(
29739 "STR_TO_UNIX".to_string(),
29740 vec![this, other],
29741 ))));
29742 }
29743 };
29744 Ok(Expression::StrToUnix(Box::new(
29745 crate::expressions::StrToUnix {
29746 this: Some(Box::new(this)),
29747 format: Some(fmt),
29748 },
29749 )))
29750 } else {
29751 Ok(e)
29752 }
29753 }
29754
29755 Action::TimeStrToUnixConvert => {
29756 // TIME_STR_TO_UNIX(x) -> dialect-specific
29757 if let Expression::Function(f) = e {
29758 let arg = f.args.into_iter().next().unwrap();
29759 match target {
29760 DialectType::DuckDB => {
29761 // EPOCH(CAST(x AS TIMESTAMP))
29762 let cast_ts = Expression::Cast(Box::new(Cast {
29763 this: arg,
29764 to: DataType::Timestamp {
29765 timezone: false,
29766 precision: None,
29767 },
29768 double_colon_syntax: false,
29769 trailing_comments: Vec::new(),
29770 format: None,
29771 default: None,
29772 inferred_type: None,
29773 }));
29774 Ok(Expression::Function(Box::new(Function::new(
29775 "EPOCH".to_string(),
29776 vec![cast_ts],
29777 ))))
29778 }
29779 DialectType::Hive
29780 | DialectType::Doris
29781 | DialectType::StarRocks
29782 | DialectType::MySQL => {
29783 // UNIX_TIMESTAMP(x)
29784 Ok(Expression::Function(Box::new(Function::new(
29785 "UNIX_TIMESTAMP".to_string(),
29786 vec![arg],
29787 ))))
29788 }
29789 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29790 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
29791 let date_parse = Expression::Function(Box::new(Function::new(
29792 "DATE_PARSE".to_string(),
29793 vec![arg, Expression::string("%Y-%m-%d %T")],
29794 )));
29795 Ok(Expression::Function(Box::new(Function::new(
29796 "TO_UNIXTIME".to_string(),
29797 vec![date_parse],
29798 ))))
29799 }
29800 _ => Ok(Expression::Function(Box::new(Function::new(
29801 "TIME_STR_TO_UNIX".to_string(),
29802 vec![arg],
29803 )))),
29804 }
29805 } else {
29806 Ok(e)
29807 }
29808 }
29809
29810 Action::TimeToTimeStrConvert => {
29811 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
29812 if let Expression::Function(f) = e {
29813 let arg = f.args.into_iter().next().unwrap();
29814 let str_type = match target {
29815 DialectType::DuckDB => DataType::Text,
29816 DialectType::Hive
29817 | DialectType::Spark
29818 | DialectType::Databricks
29819 | DialectType::Doris
29820 | DialectType::StarRocks => DataType::Custom {
29821 name: "STRING".to_string(),
29822 },
29823 DialectType::Redshift => DataType::Custom {
29824 name: "VARCHAR(MAX)".to_string(),
29825 },
29826 _ => DataType::VarChar {
29827 length: None,
29828 parenthesized_length: false,
29829 },
29830 };
29831 Ok(Expression::Cast(Box::new(Cast {
29832 this: arg,
29833 to: str_type,
29834 double_colon_syntax: false,
29835 trailing_comments: Vec::new(),
29836 format: None,
29837 default: None,
29838 inferred_type: None,
29839 })))
29840 } else {
29841 Ok(e)
29842 }
29843 }
29844
29845 Action::DateTruncSwapArgs => {
29846 // DATE_TRUNC('unit', x) from Generic -> target-specific
29847 if let Expression::Function(f) = e {
29848 if f.args.len() == 2 {
29849 let unit_arg = f.args[0].clone();
29850 let expr_arg = f.args[1].clone();
29851 // Extract unit string from the first arg
29852 let unit_str = match &unit_arg {
29853 Expression::Literal(lit)
29854 if matches!(lit.as_ref(), Literal::String(_)) =>
29855 {
29856 let Literal::String(s) = lit.as_ref() else {
29857 unreachable!()
29858 };
29859 s.to_ascii_uppercase()
29860 }
29861 _ => return Ok(Expression::Function(f)),
29862 };
29863 match target {
29864 DialectType::BigQuery => {
29865 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
29866 let unit_ident =
29867 Expression::Column(Box::new(crate::expressions::Column {
29868 name: crate::expressions::Identifier::new(unit_str),
29869 table: None,
29870 join_mark: false,
29871 trailing_comments: Vec::new(),
29872 span: None,
29873 inferred_type: None,
29874 }));
29875 Ok(Expression::Function(Box::new(Function::new(
29876 "DATE_TRUNC".to_string(),
29877 vec![expr_arg, unit_ident],
29878 ))))
29879 }
29880 DialectType::Doris => {
29881 // Doris: DATE_TRUNC(x, 'UNIT')
29882 Ok(Expression::Function(Box::new(Function::new(
29883 "DATE_TRUNC".to_string(),
29884 vec![expr_arg, Expression::string(&unit_str)],
29885 ))))
29886 }
29887 DialectType::StarRocks => {
29888 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
29889 Ok(Expression::Function(Box::new(Function::new(
29890 "DATE_TRUNC".to_string(),
29891 vec![Expression::string(&unit_str), expr_arg],
29892 ))))
29893 }
29894 DialectType::Spark | DialectType::Databricks => {
29895 // Spark: TRUNC(x, 'UNIT')
29896 Ok(Expression::Function(Box::new(Function::new(
29897 "TRUNC".to_string(),
29898 vec![expr_arg, Expression::string(&unit_str)],
29899 ))))
29900 }
29901 DialectType::MySQL => {
29902 // MySQL: complex expansion based on unit
29903 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
29904 }
29905 _ => Ok(Expression::Function(f)),
29906 }
29907 } else {
29908 Ok(Expression::Function(f))
29909 }
29910 } else {
29911 Ok(e)
29912 }
29913 }
29914
29915 Action::TimestampTruncConvert => {
29916 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
29917 if let Expression::Function(f) = e {
29918 if f.args.len() >= 2 {
29919 let expr_arg = f.args[0].clone();
29920 let unit_arg = f.args[1].clone();
29921 let tz_arg = if f.args.len() >= 3 {
29922 Some(f.args[2].clone())
29923 } else {
29924 None
29925 };
29926 // Extract unit string
29927 let unit_str = match &unit_arg {
29928 Expression::Literal(lit)
29929 if matches!(lit.as_ref(), Literal::String(_)) =>
29930 {
29931 let Literal::String(s) = lit.as_ref() else {
29932 unreachable!()
29933 };
29934 s.to_ascii_uppercase()
29935 }
29936 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
29937 _ => {
29938 return Ok(Expression::Function(f));
29939 }
29940 };
29941 match target {
29942 DialectType::Spark | DialectType::Databricks => {
29943 // Spark: DATE_TRUNC('UNIT', x)
29944 Ok(Expression::Function(Box::new(Function::new(
29945 "DATE_TRUNC".to_string(),
29946 vec![Expression::string(&unit_str), expr_arg],
29947 ))))
29948 }
29949 DialectType::Doris | DialectType::StarRocks => {
29950 // Doris: DATE_TRUNC(x, 'UNIT')
29951 Ok(Expression::Function(Box::new(Function::new(
29952 "DATE_TRUNC".to_string(),
29953 vec![expr_arg, Expression::string(&unit_str)],
29954 ))))
29955 }
29956 DialectType::BigQuery => {
29957 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
29958 let unit_ident =
29959 Expression::Column(Box::new(crate::expressions::Column {
29960 name: crate::expressions::Identifier::new(unit_str),
29961 table: None,
29962 join_mark: false,
29963 trailing_comments: Vec::new(),
29964 span: None,
29965 inferred_type: None,
29966 }));
29967 let mut args = vec![expr_arg, unit_ident];
29968 if let Some(tz) = tz_arg {
29969 args.push(tz);
29970 }
29971 Ok(Expression::Function(Box::new(Function::new(
29972 "TIMESTAMP_TRUNC".to_string(),
29973 args,
29974 ))))
29975 }
29976 DialectType::DuckDB => {
29977 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
29978 if let Some(tz) = tz_arg {
29979 let tz_str = match &tz {
29980 Expression::Literal(lit)
29981 if matches!(lit.as_ref(), Literal::String(_)) =>
29982 {
29983 let Literal::String(s) = lit.as_ref() else {
29984 unreachable!()
29985 };
29986 s.clone()
29987 }
29988 _ => "UTC".to_string(),
29989 };
29990 // x AT TIME ZONE 'tz'
29991 let at_tz = Expression::AtTimeZone(Box::new(
29992 crate::expressions::AtTimeZone {
29993 this: expr_arg,
29994 zone: Expression::string(&tz_str),
29995 },
29996 ));
29997 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
29998 let trunc = Expression::Function(Box::new(Function::new(
29999 "DATE_TRUNC".to_string(),
30000 vec![Expression::string(&unit_str), at_tz],
30001 )));
30002 // DATE_TRUNC(...) AT TIME ZONE 'tz'
30003 Ok(Expression::AtTimeZone(Box::new(
30004 crate::expressions::AtTimeZone {
30005 this: trunc,
30006 zone: Expression::string(&tz_str),
30007 },
30008 )))
30009 } else {
30010 Ok(Expression::Function(Box::new(Function::new(
30011 "DATE_TRUNC".to_string(),
30012 vec![Expression::string(&unit_str), expr_arg],
30013 ))))
30014 }
30015 }
30016 DialectType::Presto
30017 | DialectType::Trino
30018 | DialectType::Athena
30019 | DialectType::Snowflake => {
30020 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
30021 Ok(Expression::Function(Box::new(Function::new(
30022 "DATE_TRUNC".to_string(),
30023 vec![Expression::string(&unit_str), expr_arg],
30024 ))))
30025 }
30026 _ => {
30027 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
30028 let mut args = vec![Expression::string(&unit_str), expr_arg];
30029 if let Some(tz) = tz_arg {
30030 args.push(tz);
30031 }
30032 Ok(Expression::Function(Box::new(Function::new(
30033 "DATE_TRUNC".to_string(),
30034 args,
30035 ))))
30036 }
30037 }
30038 } else {
30039 Ok(Expression::Function(f))
30040 }
30041 } else {
30042 Ok(e)
30043 }
30044 }
30045
30046 Action::StrToDateConvert => {
30047 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
30048 if let Expression::Function(f) = e {
30049 if f.args.len() == 2 {
30050 let mut args = f.args;
30051 let this = args.remove(0);
30052 let fmt_expr = args.remove(0);
30053 let fmt_str = match &fmt_expr {
30054 Expression::Literal(lit)
30055 if matches!(lit.as_ref(), Literal::String(_)) =>
30056 {
30057 let Literal::String(s) = lit.as_ref() else {
30058 unreachable!()
30059 };
30060 Some(s.clone())
30061 }
30062 _ => None,
30063 };
30064 let default_date = "%Y-%m-%d";
30065 let default_time = "%Y-%m-%d %H:%M:%S";
30066 let is_default = fmt_str
30067 .as_ref()
30068 .map_or(false, |f| f == default_date || f == default_time);
30069
30070 if is_default {
30071 // Default format: handle per-dialect
30072 match target {
30073 DialectType::MySQL
30074 | DialectType::Doris
30075 | DialectType::StarRocks => {
30076 // Keep STR_TO_DATE(x, fmt) as-is
30077 Ok(Expression::Function(Box::new(Function::new(
30078 "STR_TO_DATE".to_string(),
30079 vec![this, fmt_expr],
30080 ))))
30081 }
30082 DialectType::Hive => {
30083 // Hive: CAST(x AS DATE)
30084 Ok(Expression::Cast(Box::new(Cast {
30085 this,
30086 to: DataType::Date,
30087 double_colon_syntax: false,
30088 trailing_comments: Vec::new(),
30089 format: None,
30090 default: None,
30091 inferred_type: None,
30092 })))
30093 }
30094 DialectType::Presto
30095 | DialectType::Trino
30096 | DialectType::Athena => {
30097 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
30098 let date_parse =
30099 Expression::Function(Box::new(Function::new(
30100 "DATE_PARSE".to_string(),
30101 vec![this, fmt_expr],
30102 )));
30103 Ok(Expression::Cast(Box::new(Cast {
30104 this: date_parse,
30105 to: DataType::Date,
30106 double_colon_syntax: false,
30107 trailing_comments: Vec::new(),
30108 format: None,
30109 default: None,
30110 inferred_type: None,
30111 })))
30112 }
30113 _ => {
30114 // Others: TsOrDsToDate (delegates to generator)
30115 Ok(Expression::TsOrDsToDate(Box::new(
30116 crate::expressions::TsOrDsToDate {
30117 this: Box::new(this),
30118 format: None,
30119 safe: None,
30120 },
30121 )))
30122 }
30123 }
30124 } else if let Some(fmt) = fmt_str {
30125 match target {
30126 DialectType::Doris
30127 | DialectType::StarRocks
30128 | DialectType::MySQL => {
30129 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
30130 let mut normalized = fmt.clone();
30131 normalized = normalized.replace("%-d", "%e");
30132 normalized = normalized.replace("%-m", "%c");
30133 normalized = normalized.replace("%H:%M:%S", "%T");
30134 Ok(Expression::Function(Box::new(Function::new(
30135 "STR_TO_DATE".to_string(),
30136 vec![this, Expression::string(&normalized)],
30137 ))))
30138 }
30139 DialectType::Hive => {
30140 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
30141 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
30142 let unix_ts =
30143 Expression::Function(Box::new(Function::new(
30144 "UNIX_TIMESTAMP".to_string(),
30145 vec![this, Expression::string(&java_fmt)],
30146 )));
30147 let from_unix =
30148 Expression::Function(Box::new(Function::new(
30149 "FROM_UNIXTIME".to_string(),
30150 vec![unix_ts],
30151 )));
30152 Ok(Expression::Cast(Box::new(Cast {
30153 this: from_unix,
30154 to: DataType::Date,
30155 double_colon_syntax: false,
30156 trailing_comments: Vec::new(),
30157 format: None,
30158 default: None,
30159 inferred_type: None,
30160 })))
30161 }
30162 DialectType::Spark | DialectType::Databricks => {
30163 // Spark: TO_DATE(x, java_fmt)
30164 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
30165 Ok(Expression::Function(Box::new(Function::new(
30166 "TO_DATE".to_string(),
30167 vec![this, Expression::string(&java_fmt)],
30168 ))))
30169 }
30170 DialectType::Drill => {
30171 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
30172 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
30173 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
30174 let java_fmt = java_fmt.replace('T', "'T'");
30175 Ok(Expression::Function(Box::new(Function::new(
30176 "TO_DATE".to_string(),
30177 vec![this, Expression::string(&java_fmt)],
30178 ))))
30179 }
30180 _ => {
30181 // For other dialects: use TsOrDsToDate which delegates to generator
30182 Ok(Expression::TsOrDsToDate(Box::new(
30183 crate::expressions::TsOrDsToDate {
30184 this: Box::new(this),
30185 format: Some(fmt),
30186 safe: None,
30187 },
30188 )))
30189 }
30190 }
30191 } else {
30192 // Non-string format - keep as-is
30193 let mut new_args = Vec::new();
30194 new_args.push(this);
30195 new_args.push(fmt_expr);
30196 Ok(Expression::Function(Box::new(Function::new(
30197 "STR_TO_DATE".to_string(),
30198 new_args,
30199 ))))
30200 }
30201 } else {
30202 Ok(Expression::Function(f))
30203 }
30204 } else {
30205 Ok(e)
30206 }
30207 }
30208
30209 Action::TsOrDsAddConvert => {
30210 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
30211 if let Expression::Function(f) = e {
30212 if f.args.len() == 3 {
30213 let mut args = f.args;
30214 let x = args.remove(0);
30215 let n = args.remove(0);
30216 let unit_expr = args.remove(0);
30217 let unit_str = match &unit_expr {
30218 Expression::Literal(lit)
30219 if matches!(lit.as_ref(), Literal::String(_)) =>
30220 {
30221 let Literal::String(s) = lit.as_ref() else {
30222 unreachable!()
30223 };
30224 s.to_ascii_uppercase()
30225 }
30226 _ => "DAY".to_string(),
30227 };
30228
30229 match target {
30230 DialectType::Hive
30231 | DialectType::Spark
30232 | DialectType::Databricks => {
30233 // DATE_ADD(x, n) - only supports DAY unit
30234 Ok(Expression::Function(Box::new(Function::new(
30235 "DATE_ADD".to_string(),
30236 vec![x, n],
30237 ))))
30238 }
30239 DialectType::MySQL => {
30240 // DATE_ADD(x, INTERVAL n UNIT)
30241 let iu = match unit_str.as_str() {
30242 "YEAR" => crate::expressions::IntervalUnit::Year,
30243 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30244 "MONTH" => crate::expressions::IntervalUnit::Month,
30245 "WEEK" => crate::expressions::IntervalUnit::Week,
30246 "HOUR" => crate::expressions::IntervalUnit::Hour,
30247 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30248 "SECOND" => crate::expressions::IntervalUnit::Second,
30249 _ => crate::expressions::IntervalUnit::Day,
30250 };
30251 let interval = Expression::Interval(Box::new(
30252 crate::expressions::Interval {
30253 this: Some(n),
30254 unit: Some(
30255 crate::expressions::IntervalUnitSpec::Simple {
30256 unit: iu,
30257 use_plural: false,
30258 },
30259 ),
30260 },
30261 ));
30262 Ok(Expression::Function(Box::new(Function::new(
30263 "DATE_ADD".to_string(),
30264 vec![x, interval],
30265 ))))
30266 }
30267 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30268 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
30269 let cast_ts = Expression::Cast(Box::new(Cast {
30270 this: x,
30271 to: DataType::Timestamp {
30272 precision: None,
30273 timezone: false,
30274 },
30275 double_colon_syntax: false,
30276 trailing_comments: Vec::new(),
30277 format: None,
30278 default: None,
30279 inferred_type: None,
30280 }));
30281 let cast_date = Expression::Cast(Box::new(Cast {
30282 this: cast_ts,
30283 to: DataType::Date,
30284 double_colon_syntax: false,
30285 trailing_comments: Vec::new(),
30286 format: None,
30287 default: None,
30288 inferred_type: None,
30289 }));
30290 Ok(Expression::Function(Box::new(Function::new(
30291 "DATE_ADD".to_string(),
30292 vec![Expression::string(&unit_str), n, cast_date],
30293 ))))
30294 }
30295 DialectType::DuckDB => {
30296 // CAST(x AS DATE) + INTERVAL n UNIT
30297 let cast_date = Expression::Cast(Box::new(Cast {
30298 this: x,
30299 to: DataType::Date,
30300 double_colon_syntax: false,
30301 trailing_comments: Vec::new(),
30302 format: None,
30303 default: None,
30304 inferred_type: None,
30305 }));
30306 let iu = match unit_str.as_str() {
30307 "YEAR" => crate::expressions::IntervalUnit::Year,
30308 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30309 "MONTH" => crate::expressions::IntervalUnit::Month,
30310 "WEEK" => crate::expressions::IntervalUnit::Week,
30311 "HOUR" => crate::expressions::IntervalUnit::Hour,
30312 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30313 "SECOND" => crate::expressions::IntervalUnit::Second,
30314 _ => crate::expressions::IntervalUnit::Day,
30315 };
30316 let interval = Expression::Interval(Box::new(
30317 crate::expressions::Interval {
30318 this: Some(n),
30319 unit: Some(
30320 crate::expressions::IntervalUnitSpec::Simple {
30321 unit: iu,
30322 use_plural: false,
30323 },
30324 ),
30325 },
30326 ));
30327 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
30328 left: cast_date,
30329 right: interval,
30330 left_comments: Vec::new(),
30331 operator_comments: Vec::new(),
30332 trailing_comments: Vec::new(),
30333 inferred_type: None,
30334 })))
30335 }
30336 DialectType::Drill => {
30337 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
30338 let cast_date = Expression::Cast(Box::new(Cast {
30339 this: x,
30340 to: DataType::Date,
30341 double_colon_syntax: false,
30342 trailing_comments: Vec::new(),
30343 format: None,
30344 default: None,
30345 inferred_type: None,
30346 }));
30347 let iu = match unit_str.as_str() {
30348 "YEAR" => crate::expressions::IntervalUnit::Year,
30349 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30350 "MONTH" => crate::expressions::IntervalUnit::Month,
30351 "WEEK" => crate::expressions::IntervalUnit::Week,
30352 "HOUR" => crate::expressions::IntervalUnit::Hour,
30353 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30354 "SECOND" => crate::expressions::IntervalUnit::Second,
30355 _ => crate::expressions::IntervalUnit::Day,
30356 };
30357 let interval = Expression::Interval(Box::new(
30358 crate::expressions::Interval {
30359 this: Some(n),
30360 unit: Some(
30361 crate::expressions::IntervalUnitSpec::Simple {
30362 unit: iu,
30363 use_plural: false,
30364 },
30365 ),
30366 },
30367 ));
30368 Ok(Expression::Function(Box::new(Function::new(
30369 "DATE_ADD".to_string(),
30370 vec![cast_date, interval],
30371 ))))
30372 }
30373 _ => {
30374 // Default: keep as TS_OR_DS_ADD
30375 Ok(Expression::Function(Box::new(Function::new(
30376 "TS_OR_DS_ADD".to_string(),
30377 vec![x, n, unit_expr],
30378 ))))
30379 }
30380 }
30381 } else {
30382 Ok(Expression::Function(f))
30383 }
30384 } else {
30385 Ok(e)
30386 }
30387 }
30388
30389 Action::DateFromUnixDateConvert => {
30390 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30391 if let Expression::Function(f) = e {
30392 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
30393 if matches!(
30394 target,
30395 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
30396 ) {
30397 return Ok(Expression::Function(Box::new(Function::new(
30398 "DATE_FROM_UNIX_DATE".to_string(),
30399 f.args,
30400 ))));
30401 }
30402 let n = f.args.into_iter().next().unwrap();
30403 let epoch_date = Expression::Cast(Box::new(Cast {
30404 this: Expression::string("1970-01-01"),
30405 to: DataType::Date,
30406 double_colon_syntax: false,
30407 trailing_comments: Vec::new(),
30408 format: None,
30409 default: None,
30410 inferred_type: None,
30411 }));
30412 match target {
30413 DialectType::DuckDB => {
30414 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
30415 let interval =
30416 Expression::Interval(Box::new(crate::expressions::Interval {
30417 this: Some(n),
30418 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30419 unit: crate::expressions::IntervalUnit::Day,
30420 use_plural: false,
30421 }),
30422 }));
30423 Ok(Expression::Add(Box::new(
30424 crate::expressions::BinaryOp::new(epoch_date, interval),
30425 )))
30426 }
30427 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30428 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
30429 Ok(Expression::Function(Box::new(Function::new(
30430 "DATE_ADD".to_string(),
30431 vec![Expression::string("DAY"), n, epoch_date],
30432 ))))
30433 }
30434 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
30435 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30436 Ok(Expression::Function(Box::new(Function::new(
30437 "DATEADD".to_string(),
30438 vec![
30439 Expression::Identifier(Identifier::new("DAY")),
30440 n,
30441 epoch_date,
30442 ],
30443 ))))
30444 }
30445 DialectType::BigQuery => {
30446 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30447 let interval =
30448 Expression::Interval(Box::new(crate::expressions::Interval {
30449 this: Some(n),
30450 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30451 unit: crate::expressions::IntervalUnit::Day,
30452 use_plural: false,
30453 }),
30454 }));
30455 Ok(Expression::Function(Box::new(Function::new(
30456 "DATE_ADD".to_string(),
30457 vec![epoch_date, interval],
30458 ))))
30459 }
30460 DialectType::MySQL
30461 | DialectType::Doris
30462 | DialectType::StarRocks
30463 | DialectType::Drill => {
30464 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30465 let interval =
30466 Expression::Interval(Box::new(crate::expressions::Interval {
30467 this: Some(n),
30468 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30469 unit: crate::expressions::IntervalUnit::Day,
30470 use_plural: false,
30471 }),
30472 }));
30473 Ok(Expression::Function(Box::new(Function::new(
30474 "DATE_ADD".to_string(),
30475 vec![epoch_date, interval],
30476 ))))
30477 }
30478 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30479 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
30480 Ok(Expression::Function(Box::new(Function::new(
30481 "DATE_ADD".to_string(),
30482 vec![epoch_date, n],
30483 ))))
30484 }
30485 DialectType::PostgreSQL
30486 | DialectType::Materialize
30487 | DialectType::RisingWave => {
30488 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
30489 let n_str = match &n {
30490 Expression::Literal(lit)
30491 if matches!(lit.as_ref(), Literal::Number(_)) =>
30492 {
30493 let Literal::Number(s) = lit.as_ref() else {
30494 unreachable!()
30495 };
30496 s.clone()
30497 }
30498 _ => Self::expr_to_string_static(&n),
30499 };
30500 let interval =
30501 Expression::Interval(Box::new(crate::expressions::Interval {
30502 this: Some(Expression::string(&format!("{} DAY", n_str))),
30503 unit: None,
30504 }));
30505 Ok(Expression::Add(Box::new(
30506 crate::expressions::BinaryOp::new(epoch_date, interval),
30507 )))
30508 }
30509 _ => {
30510 // Default: keep as-is
30511 Ok(Expression::Function(Box::new(Function::new(
30512 "DATE_FROM_UNIX_DATE".to_string(),
30513 vec![n],
30514 ))))
30515 }
30516 }
30517 } else {
30518 Ok(e)
30519 }
30520 }
30521
30522 Action::ArrayRemoveConvert => {
30523 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
30524 if let Expression::ArrayRemove(bf) = e {
30525 let arr = bf.this;
30526 let target_val = bf.expression;
30527 match target {
30528 DialectType::DuckDB => {
30529 let u_id = crate::expressions::Identifier::new("_u");
30530 let lambda =
30531 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30532 parameters: vec![u_id.clone()],
30533 body: Expression::Neq(Box::new(BinaryOp {
30534 left: Expression::Identifier(u_id),
30535 right: target_val,
30536 left_comments: Vec::new(),
30537 operator_comments: Vec::new(),
30538 trailing_comments: Vec::new(),
30539 inferred_type: None,
30540 })),
30541 colon: false,
30542 parameter_types: Vec::new(),
30543 }));
30544 Ok(Expression::Function(Box::new(Function::new(
30545 "LIST_FILTER".to_string(),
30546 vec![arr, lambda],
30547 ))))
30548 }
30549 DialectType::ClickHouse => {
30550 let u_id = crate::expressions::Identifier::new("_u");
30551 let lambda =
30552 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30553 parameters: vec![u_id.clone()],
30554 body: Expression::Neq(Box::new(BinaryOp {
30555 left: Expression::Identifier(u_id),
30556 right: target_val,
30557 left_comments: Vec::new(),
30558 operator_comments: Vec::new(),
30559 trailing_comments: Vec::new(),
30560 inferred_type: None,
30561 })),
30562 colon: false,
30563 parameter_types: Vec::new(),
30564 }));
30565 Ok(Expression::Function(Box::new(Function::new(
30566 "arrayFilter".to_string(),
30567 vec![lambda, arr],
30568 ))))
30569 }
30570 DialectType::BigQuery => {
30571 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
30572 let u_id = crate::expressions::Identifier::new("_u");
30573 let u_col =
30574 Expression::Column(Box::new(crate::expressions::Column {
30575 name: u_id.clone(),
30576 table: None,
30577 join_mark: false,
30578 trailing_comments: Vec::new(),
30579 span: None,
30580 inferred_type: None,
30581 }));
30582 let unnest_expr =
30583 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
30584 this: arr,
30585 expressions: Vec::new(),
30586 with_ordinality: false,
30587 alias: None,
30588 offset_alias: None,
30589 }));
30590 let aliased_unnest =
30591 Expression::Alias(Box::new(crate::expressions::Alias {
30592 this: unnest_expr,
30593 alias: u_id.clone(),
30594 column_aliases: Vec::new(),
30595 alias_explicit_as: false,
30596 alias_keyword: None,
30597 pre_alias_comments: Vec::new(),
30598 trailing_comments: Vec::new(),
30599 inferred_type: None,
30600 }));
30601 let where_cond = Expression::Neq(Box::new(BinaryOp {
30602 left: u_col.clone(),
30603 right: target_val,
30604 left_comments: Vec::new(),
30605 operator_comments: Vec::new(),
30606 trailing_comments: Vec::new(),
30607 inferred_type: None,
30608 }));
30609 let subquery = Expression::Select(Box::new(
30610 crate::expressions::Select::new()
30611 .column(u_col)
30612 .from(aliased_unnest)
30613 .where_(where_cond),
30614 ));
30615 Ok(Expression::ArrayFunc(Box::new(
30616 crate::expressions::ArrayConstructor {
30617 expressions: vec![subquery],
30618 bracket_notation: false,
30619 use_list_keyword: false,
30620 },
30621 )))
30622 }
30623 _ => Ok(Expression::ArrayRemove(Box::new(
30624 crate::expressions::BinaryFunc {
30625 original_name: None,
30626 this: arr,
30627 expression: target_val,
30628 inferred_type: None,
30629 },
30630 ))),
30631 }
30632 } else {
30633 Ok(e)
30634 }
30635 }
30636
30637 Action::ArrayReverseConvert => {
30638 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
30639 if let Expression::ArrayReverse(af) = e {
30640 Ok(Expression::Function(Box::new(Function::new(
30641 "arrayReverse".to_string(),
30642 vec![af.this],
30643 ))))
30644 } else {
30645 Ok(e)
30646 }
30647 }
30648
30649 Action::JsonKeysConvert => {
30650 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
30651 if let Expression::JsonKeys(uf) = e {
30652 match target {
30653 DialectType::Spark | DialectType::Databricks => {
30654 Ok(Expression::Function(Box::new(Function::new(
30655 "JSON_OBJECT_KEYS".to_string(),
30656 vec![uf.this],
30657 ))))
30658 }
30659 DialectType::Snowflake => Ok(Expression::Function(Box::new(
30660 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
30661 ))),
30662 _ => Ok(Expression::JsonKeys(uf)),
30663 }
30664 } else {
30665 Ok(e)
30666 }
30667 }
30668
30669 Action::ParseJsonStrip => {
30670 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
30671 if let Expression::ParseJson(uf) = e {
30672 Ok(uf.this)
30673 } else {
30674 Ok(e)
30675 }
30676 }
30677
30678 Action::ArraySizeDrill => {
30679 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
30680 if let Expression::ArraySize(uf) = e {
30681 Ok(Expression::Function(Box::new(Function::new(
30682 "REPEATED_COUNT".to_string(),
30683 vec![uf.this],
30684 ))))
30685 } else {
30686 Ok(e)
30687 }
30688 }
30689
30690 Action::WeekOfYearToWeekIso => {
30691 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
30692 if let Expression::WeekOfYear(uf) = e {
30693 Ok(Expression::Function(Box::new(Function::new(
30694 "WEEKISO".to_string(),
30695 vec![uf.this],
30696 ))))
30697 } else {
30698 Ok(e)
30699 }
30700 }
30701 }
30702 })
30703 }
30704
30705 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
30706 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
30707 use crate::expressions::Function;
30708 match unit {
30709 "DAY" => {
30710 // DATE(x)
30711 Ok(Expression::Function(Box::new(Function::new(
30712 "DATE".to_string(),
30713 vec![expr.clone()],
30714 ))))
30715 }
30716 "WEEK" => {
30717 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
30718 let year_x = Expression::Function(Box::new(Function::new(
30719 "YEAR".to_string(),
30720 vec![expr.clone()],
30721 )));
30722 let week_x = Expression::Function(Box::new(Function::new(
30723 "WEEK".to_string(),
30724 vec![expr.clone(), Expression::number(1)],
30725 )));
30726 let concat_args = vec![
30727 year_x,
30728 Expression::string(" "),
30729 week_x,
30730 Expression::string(" 1"),
30731 ];
30732 let concat = Expression::Function(Box::new(Function::new(
30733 "CONCAT".to_string(),
30734 concat_args,
30735 )));
30736 Ok(Expression::Function(Box::new(Function::new(
30737 "STR_TO_DATE".to_string(),
30738 vec![concat, Expression::string("%Y %u %w")],
30739 ))))
30740 }
30741 "MONTH" => {
30742 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
30743 let year_x = Expression::Function(Box::new(Function::new(
30744 "YEAR".to_string(),
30745 vec![expr.clone()],
30746 )));
30747 let month_x = Expression::Function(Box::new(Function::new(
30748 "MONTH".to_string(),
30749 vec![expr.clone()],
30750 )));
30751 let concat_args = vec![
30752 year_x,
30753 Expression::string(" "),
30754 month_x,
30755 Expression::string(" 1"),
30756 ];
30757 let concat = Expression::Function(Box::new(Function::new(
30758 "CONCAT".to_string(),
30759 concat_args,
30760 )));
30761 Ok(Expression::Function(Box::new(Function::new(
30762 "STR_TO_DATE".to_string(),
30763 vec![concat, Expression::string("%Y %c %e")],
30764 ))))
30765 }
30766 "QUARTER" => {
30767 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
30768 let year_x = Expression::Function(Box::new(Function::new(
30769 "YEAR".to_string(),
30770 vec![expr.clone()],
30771 )));
30772 let quarter_x = Expression::Function(Box::new(Function::new(
30773 "QUARTER".to_string(),
30774 vec![expr.clone()],
30775 )));
30776 // QUARTER(x) * 3 - 2
30777 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
30778 left: quarter_x,
30779 right: Expression::number(3),
30780 left_comments: Vec::new(),
30781 operator_comments: Vec::new(),
30782 trailing_comments: Vec::new(),
30783 inferred_type: None,
30784 }));
30785 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
30786 left: mul,
30787 right: Expression::number(2),
30788 left_comments: Vec::new(),
30789 operator_comments: Vec::new(),
30790 trailing_comments: Vec::new(),
30791 inferred_type: None,
30792 }));
30793 let concat_args = vec![
30794 year_x,
30795 Expression::string(" "),
30796 sub,
30797 Expression::string(" 1"),
30798 ];
30799 let concat = Expression::Function(Box::new(Function::new(
30800 "CONCAT".to_string(),
30801 concat_args,
30802 )));
30803 Ok(Expression::Function(Box::new(Function::new(
30804 "STR_TO_DATE".to_string(),
30805 vec![concat, Expression::string("%Y %c %e")],
30806 ))))
30807 }
30808 "YEAR" => {
30809 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
30810 let year_x = Expression::Function(Box::new(Function::new(
30811 "YEAR".to_string(),
30812 vec![expr.clone()],
30813 )));
30814 let concat_args = vec![year_x, Expression::string(" 1 1")];
30815 let concat = Expression::Function(Box::new(Function::new(
30816 "CONCAT".to_string(),
30817 concat_args,
30818 )));
30819 Ok(Expression::Function(Box::new(Function::new(
30820 "STR_TO_DATE".to_string(),
30821 vec![concat, Expression::string("%Y %c %e")],
30822 ))))
30823 }
30824 _ => {
30825 // Unsupported unit -> keep as DATE_TRUNC
30826 Ok(Expression::Function(Box::new(Function::new(
30827 "DATE_TRUNC".to_string(),
30828 vec![Expression::string(unit), expr.clone()],
30829 ))))
30830 }
30831 }
30832 }
30833
30834 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
30835 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
30836 use crate::expressions::DataType;
30837 match dt {
30838 DataType::VarChar { .. } | DataType::Char { .. } => true,
30839 DataType::Struct { fields, .. } => fields
30840 .iter()
30841 .any(|f| Self::has_varchar_char_type(&f.data_type)),
30842 _ => false,
30843 }
30844 }
30845
30846 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
30847 fn normalize_varchar_to_string(
30848 dt: crate::expressions::DataType,
30849 ) -> crate::expressions::DataType {
30850 use crate::expressions::DataType;
30851 match dt {
30852 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
30853 name: "STRING".to_string(),
30854 },
30855 DataType::Struct { fields, nested } => {
30856 let fields = fields
30857 .into_iter()
30858 .map(|mut f| {
30859 f.data_type = Self::normalize_varchar_to_string(f.data_type);
30860 f
30861 })
30862 .collect();
30863 DataType::Struct { fields, nested }
30864 }
30865 other => other,
30866 }
30867 }
30868
30869 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
30870 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
30871 if let Expression::Literal(ref lit) = expr {
30872 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
30873 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
30874 let trimmed = s.trim();
30875
30876 // Find where digits end and unit text begins
30877 let digit_end = trimmed
30878 .find(|c: char| !c.is_ascii_digit())
30879 .unwrap_or(trimmed.len());
30880 if digit_end == 0 || digit_end == trimmed.len() {
30881 return expr;
30882 }
30883 let num = &trimmed[..digit_end];
30884 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
30885 if unit_text.is_empty() {
30886 return expr;
30887 }
30888
30889 let known_units = [
30890 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
30891 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
30892 ];
30893 if !known_units.contains(&unit_text.as_str()) {
30894 return expr;
30895 }
30896
30897 let unit_str = unit_text.clone();
30898 // Singularize
30899 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
30900 &unit_str[..unit_str.len() - 1]
30901 } else {
30902 &unit_str
30903 };
30904 let unit = unit_singular;
30905
30906 match target {
30907 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30908 // INTERVAL '2' DAY
30909 let iu = match unit {
30910 "DAY" => crate::expressions::IntervalUnit::Day,
30911 "HOUR" => crate::expressions::IntervalUnit::Hour,
30912 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30913 "SECOND" => crate::expressions::IntervalUnit::Second,
30914 "WEEK" => crate::expressions::IntervalUnit::Week,
30915 "MONTH" => crate::expressions::IntervalUnit::Month,
30916 "YEAR" => crate::expressions::IntervalUnit::Year,
30917 _ => return expr,
30918 };
30919 return Expression::Interval(Box::new(crate::expressions::Interval {
30920 this: Some(Expression::string(num)),
30921 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30922 unit: iu,
30923 use_plural: false,
30924 }),
30925 }));
30926 }
30927 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
30928 // INTERVAL '2 DAYS'
30929 let plural = if num != "1" && !unit_str.ends_with('S') {
30930 format!("{} {}S", num, unit)
30931 } else if unit_str.ends_with('S') {
30932 format!("{} {}", num, unit_str)
30933 } else {
30934 format!("{} {}", num, unit)
30935 };
30936 return Expression::Interval(Box::new(crate::expressions::Interval {
30937 this: Some(Expression::string(&plural)),
30938 unit: None,
30939 }));
30940 }
30941 _ => {
30942 // Spark/Databricks/Hive: INTERVAL '1' DAY
30943 let iu = match unit {
30944 "DAY" => crate::expressions::IntervalUnit::Day,
30945 "HOUR" => crate::expressions::IntervalUnit::Hour,
30946 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30947 "SECOND" => crate::expressions::IntervalUnit::Second,
30948 "WEEK" => crate::expressions::IntervalUnit::Week,
30949 "MONTH" => crate::expressions::IntervalUnit::Month,
30950 "YEAR" => crate::expressions::IntervalUnit::Year,
30951 _ => return expr,
30952 };
30953 return Expression::Interval(Box::new(crate::expressions::Interval {
30954 this: Some(Expression::string(num)),
30955 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30956 unit: iu,
30957 use_plural: false,
30958 }),
30959 }));
30960 }
30961 }
30962 }
30963 }
30964 // If it's already an INTERVAL expression, pass through
30965 expr
30966 }
30967
30968 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
30969 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
30970 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
30971 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
30972 fn rewrite_unnest_expansion(
30973 select: &crate::expressions::Select,
30974 target: DialectType,
30975 ) -> Option<crate::expressions::Select> {
30976 use crate::expressions::{
30977 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
30978 UnnestFunc,
30979 };
30980
30981 let index_offset: i64 = match target {
30982 DialectType::Presto | DialectType::Trino => 1,
30983 _ => 0, // BigQuery, Snowflake
30984 };
30985
30986 let if_func_name = match target {
30987 DialectType::Snowflake => "IFF",
30988 _ => "IF",
30989 };
30990
30991 let array_length_func = match target {
30992 DialectType::BigQuery => "ARRAY_LENGTH",
30993 DialectType::Presto | DialectType::Trino => "CARDINALITY",
30994 DialectType::Snowflake => "ARRAY_SIZE",
30995 _ => "ARRAY_LENGTH",
30996 };
30997
30998 let use_table_aliases = matches!(
30999 target,
31000 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
31001 );
31002 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
31003
31004 fn make_col(name: &str, table: Option<&str>) -> Expression {
31005 if let Some(tbl) = table {
31006 Expression::boxed_column(Column {
31007 name: Identifier::new(name.to_string()),
31008 table: Some(Identifier::new(tbl.to_string())),
31009 join_mark: false,
31010 trailing_comments: Vec::new(),
31011 span: None,
31012 inferred_type: None,
31013 })
31014 } else {
31015 Expression::Identifier(Identifier::new(name.to_string()))
31016 }
31017 }
31018
31019 fn make_join(this: Expression) -> Join {
31020 Join {
31021 this,
31022 on: None,
31023 using: Vec::new(),
31024 kind: JoinKind::Cross,
31025 use_inner_keyword: false,
31026 use_outer_keyword: false,
31027 deferred_condition: false,
31028 join_hint: None,
31029 match_condition: None,
31030 pivots: Vec::new(),
31031 comments: Vec::new(),
31032 nesting_group: 0,
31033 directed: false,
31034 }
31035 }
31036
31037 // Collect UNNEST info from SELECT expressions
31038 struct UnnestInfo {
31039 arr_expr: Expression,
31040 col_alias: String,
31041 pos_alias: String,
31042 source_alias: String,
31043 original_expr: Expression,
31044 has_outer_alias: Option<String>,
31045 }
31046
31047 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
31048 let mut col_counter = 0usize;
31049 let mut pos_counter = 1usize;
31050 let mut source_counter = 1usize;
31051
31052 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
31053 match expr {
31054 Expression::Unnest(u) => Some(u.this.clone()),
31055 Expression::Function(f)
31056 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
31057 {
31058 Some(f.args[0].clone())
31059 }
31060 Expression::Alias(a) => extract_unnest_arg(&a.this),
31061 Expression::Add(op)
31062 | Expression::Sub(op)
31063 | Expression::Mul(op)
31064 | Expression::Div(op) => {
31065 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
31066 }
31067 _ => None,
31068 }
31069 }
31070
31071 fn get_alias_name(expr: &Expression) -> Option<String> {
31072 if let Expression::Alias(a) = expr {
31073 Some(a.alias.name.clone())
31074 } else {
31075 None
31076 }
31077 }
31078
31079 for sel_expr in &select.expressions {
31080 if let Some(arr) = extract_unnest_arg(sel_expr) {
31081 col_counter += 1;
31082 pos_counter += 1;
31083 source_counter += 1;
31084
31085 let col_alias = if col_counter == 1 {
31086 "col".to_string()
31087 } else {
31088 format!("col_{}", col_counter)
31089 };
31090 let pos_alias = format!("pos_{}", pos_counter);
31091 let source_alias = format!("_u_{}", source_counter);
31092 let has_outer_alias = get_alias_name(sel_expr);
31093
31094 unnest_infos.push(UnnestInfo {
31095 arr_expr: arr,
31096 col_alias,
31097 pos_alias,
31098 source_alias,
31099 original_expr: sel_expr.clone(),
31100 has_outer_alias,
31101 });
31102 }
31103 }
31104
31105 if unnest_infos.is_empty() {
31106 return None;
31107 }
31108
31109 let series_alias = "pos".to_string();
31110 let series_source_alias = "_u".to_string();
31111 let tbl_ref = if use_table_aliases {
31112 Some(series_source_alias.as_str())
31113 } else {
31114 None
31115 };
31116
31117 // Build new SELECT expressions
31118 let mut new_select_exprs = Vec::new();
31119 for info in &unnest_infos {
31120 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
31121 let src_ref = if use_table_aliases {
31122 Some(info.source_alias.as_str())
31123 } else {
31124 None
31125 };
31126
31127 let pos_col = make_col(&series_alias, tbl_ref);
31128 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
31129 let col_ref = make_col(actual_col_name, src_ref);
31130
31131 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
31132 pos_col.clone(),
31133 unnest_pos_col.clone(),
31134 )));
31135 let mut if_args = vec![eq_cond, col_ref];
31136 if null_third_arg {
31137 if_args.push(Expression::Null(crate::expressions::Null));
31138 }
31139
31140 let if_expr =
31141 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
31142 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
31143
31144 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
31145 final_expr,
31146 Identifier::new(actual_col_name.clone()),
31147 ))));
31148 }
31149
31150 // Build array size expressions for GREATEST
31151 let size_exprs: Vec<Expression> = unnest_infos
31152 .iter()
31153 .map(|info| {
31154 Expression::Function(Box::new(Function::new(
31155 array_length_func.to_string(),
31156 vec![info.arr_expr.clone()],
31157 )))
31158 })
31159 .collect();
31160
31161 let greatest =
31162 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
31163
31164 let series_end = if index_offset == 0 {
31165 Expression::Sub(Box::new(BinaryOp::new(
31166 greatest,
31167 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31168 )))
31169 } else {
31170 greatest
31171 };
31172
31173 // Build the position array source
31174 let series_unnest_expr = match target {
31175 DialectType::BigQuery => {
31176 let gen_array = Expression::Function(Box::new(Function::new(
31177 "GENERATE_ARRAY".to_string(),
31178 vec![
31179 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
31180 series_end,
31181 ],
31182 )));
31183 Expression::Unnest(Box::new(UnnestFunc {
31184 this: gen_array,
31185 expressions: Vec::new(),
31186 with_ordinality: false,
31187 alias: None,
31188 offset_alias: None,
31189 }))
31190 }
31191 DialectType::Presto | DialectType::Trino => {
31192 let sequence = Expression::Function(Box::new(Function::new(
31193 "SEQUENCE".to_string(),
31194 vec![
31195 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31196 series_end,
31197 ],
31198 )));
31199 Expression::Unnest(Box::new(UnnestFunc {
31200 this: sequence,
31201 expressions: Vec::new(),
31202 with_ordinality: false,
31203 alias: None,
31204 offset_alias: None,
31205 }))
31206 }
31207 DialectType::Snowflake => {
31208 let range_end = Expression::Add(Box::new(BinaryOp::new(
31209 Expression::Paren(Box::new(crate::expressions::Paren {
31210 this: series_end,
31211 trailing_comments: Vec::new(),
31212 })),
31213 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31214 )));
31215 let gen_range = Expression::Function(Box::new(Function::new(
31216 "ARRAY_GENERATE_RANGE".to_string(),
31217 vec![
31218 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
31219 range_end,
31220 ],
31221 )));
31222 let flatten_arg =
31223 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
31224 name: Identifier::new("INPUT".to_string()),
31225 value: gen_range,
31226 separator: crate::expressions::NamedArgSeparator::DArrow,
31227 }));
31228 let flatten = Expression::Function(Box::new(Function::new(
31229 "FLATTEN".to_string(),
31230 vec![flatten_arg],
31231 )));
31232 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
31233 }
31234 _ => return None,
31235 };
31236
31237 // Build series alias expression
31238 let series_alias_expr = if use_table_aliases {
31239 let col_aliases = if matches!(target, DialectType::Snowflake) {
31240 vec![
31241 Identifier::new("seq".to_string()),
31242 Identifier::new("key".to_string()),
31243 Identifier::new("path".to_string()),
31244 Identifier::new("index".to_string()),
31245 Identifier::new(series_alias.clone()),
31246 Identifier::new("this".to_string()),
31247 ]
31248 } else {
31249 vec![Identifier::new(series_alias.clone())]
31250 };
31251 Expression::Alias(Box::new(Alias {
31252 this: series_unnest_expr,
31253 alias: Identifier::new(series_source_alias.clone()),
31254 column_aliases: col_aliases,
31255 alias_explicit_as: false,
31256 alias_keyword: None,
31257 pre_alias_comments: Vec::new(),
31258 trailing_comments: Vec::new(),
31259 inferred_type: None,
31260 }))
31261 } else {
31262 Expression::Alias(Box::new(Alias::new(
31263 series_unnest_expr,
31264 Identifier::new(series_alias.clone()),
31265 )))
31266 };
31267
31268 // Build CROSS JOINs for each UNNEST
31269 let mut joins = Vec::new();
31270 for info in &unnest_infos {
31271 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
31272
31273 let unnest_join_expr = match target {
31274 DialectType::BigQuery => {
31275 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
31276 let unnest = UnnestFunc {
31277 this: info.arr_expr.clone(),
31278 expressions: Vec::new(),
31279 with_ordinality: true,
31280 alias: Some(Identifier::new(actual_col_name.clone())),
31281 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
31282 };
31283 Expression::Unnest(Box::new(unnest))
31284 }
31285 DialectType::Presto | DialectType::Trino => {
31286 let unnest = UnnestFunc {
31287 this: info.arr_expr.clone(),
31288 expressions: Vec::new(),
31289 with_ordinality: true,
31290 alias: None,
31291 offset_alias: None,
31292 };
31293 Expression::Alias(Box::new(Alias {
31294 this: Expression::Unnest(Box::new(unnest)),
31295 alias: Identifier::new(info.source_alias.clone()),
31296 column_aliases: vec![
31297 Identifier::new(actual_col_name.clone()),
31298 Identifier::new(info.pos_alias.clone()),
31299 ],
31300 alias_explicit_as: false,
31301 alias_keyword: None,
31302 pre_alias_comments: Vec::new(),
31303 trailing_comments: Vec::new(),
31304 inferred_type: None,
31305 }))
31306 }
31307 DialectType::Snowflake => {
31308 let flatten_arg =
31309 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
31310 name: Identifier::new("INPUT".to_string()),
31311 value: info.arr_expr.clone(),
31312 separator: crate::expressions::NamedArgSeparator::DArrow,
31313 }));
31314 let flatten = Expression::Function(Box::new(Function::new(
31315 "FLATTEN".to_string(),
31316 vec![flatten_arg],
31317 )));
31318 let table_fn = Expression::Function(Box::new(Function::new(
31319 "TABLE".to_string(),
31320 vec![flatten],
31321 )));
31322 Expression::Alias(Box::new(Alias {
31323 this: table_fn,
31324 alias: Identifier::new(info.source_alias.clone()),
31325 column_aliases: vec![
31326 Identifier::new("seq".to_string()),
31327 Identifier::new("key".to_string()),
31328 Identifier::new("path".to_string()),
31329 Identifier::new(info.pos_alias.clone()),
31330 Identifier::new(actual_col_name.clone()),
31331 Identifier::new("this".to_string()),
31332 ],
31333 alias_explicit_as: false,
31334 alias_keyword: None,
31335 pre_alias_comments: Vec::new(),
31336 trailing_comments: Vec::new(),
31337 inferred_type: None,
31338 }))
31339 }
31340 _ => return None,
31341 };
31342
31343 joins.push(make_join(unnest_join_expr));
31344 }
31345
31346 // Build WHERE clause
31347 let mut where_conditions: Vec<Expression> = Vec::new();
31348 for info in &unnest_infos {
31349 let src_ref = if use_table_aliases {
31350 Some(info.source_alias.as_str())
31351 } else {
31352 None
31353 };
31354 let pos_col = make_col(&series_alias, tbl_ref);
31355 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
31356
31357 let arr_size = Expression::Function(Box::new(Function::new(
31358 array_length_func.to_string(),
31359 vec![info.arr_expr.clone()],
31360 )));
31361
31362 let size_ref = if index_offset == 0 {
31363 Expression::Paren(Box::new(crate::expressions::Paren {
31364 this: Expression::Sub(Box::new(BinaryOp::new(
31365 arr_size,
31366 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31367 ))),
31368 trailing_comments: Vec::new(),
31369 }))
31370 } else {
31371 arr_size
31372 };
31373
31374 let eq = Expression::Eq(Box::new(BinaryOp::new(
31375 pos_col.clone(),
31376 unnest_pos_col.clone(),
31377 )));
31378 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
31379 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
31380 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
31381 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
31382 this: and_cond,
31383 trailing_comments: Vec::new(),
31384 }));
31385 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
31386
31387 where_conditions.push(or_cond);
31388 }
31389
31390 let where_expr = if where_conditions.len() == 1 {
31391 // Single condition: no parens needed
31392 where_conditions.into_iter().next().unwrap()
31393 } else {
31394 // Multiple conditions: wrap each OR in parens, then combine with AND
31395 let wrap = |e: Expression| {
31396 Expression::Paren(Box::new(crate::expressions::Paren {
31397 this: e,
31398 trailing_comments: Vec::new(),
31399 }))
31400 };
31401 let mut iter = where_conditions.into_iter();
31402 let first = wrap(iter.next().unwrap());
31403 let second = wrap(iter.next().unwrap());
31404 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
31405 this: Expression::And(Box::new(BinaryOp::new(first, second))),
31406 trailing_comments: Vec::new(),
31407 }));
31408 for cond in iter {
31409 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
31410 }
31411 combined
31412 };
31413
31414 // Build the new SELECT
31415 let mut new_select = select.clone();
31416 new_select.expressions = new_select_exprs;
31417
31418 if new_select.from.is_some() {
31419 let mut all_joins = vec![make_join(series_alias_expr)];
31420 all_joins.extend(joins);
31421 new_select.joins.extend(all_joins);
31422 } else {
31423 new_select.from = Some(From {
31424 expressions: vec![series_alias_expr],
31425 });
31426 new_select.joins.extend(joins);
31427 }
31428
31429 if let Some(ref existing_where) = new_select.where_clause {
31430 let combined = Expression::And(Box::new(BinaryOp::new(
31431 existing_where.this.clone(),
31432 where_expr,
31433 )));
31434 new_select.where_clause = Some(crate::expressions::Where { this: combined });
31435 } else {
31436 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
31437 }
31438
31439 Some(new_select)
31440 }
31441
31442 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
31443 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
31444 match original {
31445 Expression::Unnest(_) => replacement.clone(),
31446 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
31447 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
31448 Expression::Add(op) => {
31449 let left = Self::replace_unnest_with_if(&op.left, replacement);
31450 let right = Self::replace_unnest_with_if(&op.right, replacement);
31451 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
31452 }
31453 Expression::Sub(op) => {
31454 let left = Self::replace_unnest_with_if(&op.left, replacement);
31455 let right = Self::replace_unnest_with_if(&op.right, replacement);
31456 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
31457 }
31458 Expression::Mul(op) => {
31459 let left = Self::replace_unnest_with_if(&op.left, replacement);
31460 let right = Self::replace_unnest_with_if(&op.right, replacement);
31461 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
31462 }
31463 Expression::Div(op) => {
31464 let left = Self::replace_unnest_with_if(&op.left, replacement);
31465 let right = Self::replace_unnest_with_if(&op.right, replacement);
31466 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
31467 }
31468 _ => original.clone(),
31469 }
31470 }
31471
31472 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
31473 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
31474 fn decompose_json_path(path: &str) -> Vec<String> {
31475 let mut parts = Vec::new();
31476 let path = if path.starts_with("$.") {
31477 &path[2..]
31478 } else if path.starts_with('$') {
31479 &path[1..]
31480 } else {
31481 path
31482 };
31483 if path.is_empty() {
31484 return parts;
31485 }
31486 let mut current = String::new();
31487 let chars: Vec<char> = path.chars().collect();
31488 let mut i = 0;
31489 while i < chars.len() {
31490 match chars[i] {
31491 '.' => {
31492 if !current.is_empty() {
31493 parts.push(current.clone());
31494 current.clear();
31495 }
31496 i += 1;
31497 }
31498 '[' => {
31499 if !current.is_empty() {
31500 parts.push(current.clone());
31501 current.clear();
31502 }
31503 i += 1;
31504 let mut bracket_content = String::new();
31505 while i < chars.len() && chars[i] != ']' {
31506 if chars[i] == '"' || chars[i] == '\'' {
31507 let quote = chars[i];
31508 i += 1;
31509 while i < chars.len() && chars[i] != quote {
31510 bracket_content.push(chars[i]);
31511 i += 1;
31512 }
31513 if i < chars.len() {
31514 i += 1;
31515 }
31516 } else {
31517 bracket_content.push(chars[i]);
31518 i += 1;
31519 }
31520 }
31521 if i < chars.len() {
31522 i += 1;
31523 }
31524 if bracket_content != "*" {
31525 parts.push(bracket_content);
31526 }
31527 }
31528 _ => {
31529 current.push(chars[i]);
31530 i += 1;
31531 }
31532 }
31533 }
31534 if !current.is_empty() {
31535 parts.push(current);
31536 }
31537 parts
31538 }
31539
31540 /// Strip `$` prefix from a JSON path, keeping the rest.
31541 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
31542 fn strip_json_dollar_prefix(path: &str) -> String {
31543 if path.starts_with("$.") {
31544 path[2..].to_string()
31545 } else if path.starts_with('$') {
31546 path[1..].to_string()
31547 } else {
31548 path.to_string()
31549 }
31550 }
31551
31552 /// Strip `[*]` wildcards from a JSON path.
31553 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
31554 fn strip_json_wildcards(path: &str) -> String {
31555 path.replace("[*]", "")
31556 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
31557 .trim_end_matches('.')
31558 .to_string()
31559 }
31560
31561 /// Convert bracket notation to dot notation for JSON paths.
31562 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
31563 fn bracket_to_dot_notation(path: &str) -> String {
31564 let mut result = String::new();
31565 let chars: Vec<char> = path.chars().collect();
31566 let mut i = 0;
31567 while i < chars.len() {
31568 if chars[i] == '[' {
31569 // Read bracket content
31570 i += 1;
31571 let mut bracket_content = String::new();
31572 let mut is_quoted = false;
31573 let mut _quote_char = '"';
31574 while i < chars.len() && chars[i] != ']' {
31575 if chars[i] == '"' || chars[i] == '\'' {
31576 is_quoted = true;
31577 _quote_char = chars[i];
31578 i += 1;
31579 while i < chars.len() && chars[i] != _quote_char {
31580 bracket_content.push(chars[i]);
31581 i += 1;
31582 }
31583 if i < chars.len() {
31584 i += 1;
31585 }
31586 } else {
31587 bracket_content.push(chars[i]);
31588 i += 1;
31589 }
31590 }
31591 if i < chars.len() {
31592 i += 1;
31593 } // skip ]
31594 if bracket_content == "*" {
31595 // Keep wildcard as-is
31596 result.push_str("[*]");
31597 } else if is_quoted {
31598 // Quoted bracket -> dot notation with quotes
31599 result.push('.');
31600 result.push('"');
31601 result.push_str(&bracket_content);
31602 result.push('"');
31603 } else {
31604 // Numeric index -> keep as bracket
31605 result.push('[');
31606 result.push_str(&bracket_content);
31607 result.push(']');
31608 }
31609 } else {
31610 result.push(chars[i]);
31611 i += 1;
31612 }
31613 }
31614 result
31615 }
31616
31617 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
31618 /// `$["a b"]` -> `$['a b']`
31619 fn bracket_to_single_quotes(path: &str) -> String {
31620 let mut result = String::new();
31621 let chars: Vec<char> = path.chars().collect();
31622 let mut i = 0;
31623 while i < chars.len() {
31624 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
31625 result.push('[');
31626 result.push('\'');
31627 i += 2; // skip [ and "
31628 while i < chars.len() && chars[i] != '"' {
31629 result.push(chars[i]);
31630 i += 1;
31631 }
31632 if i < chars.len() {
31633 i += 1;
31634 } // skip closing "
31635 result.push('\'');
31636 } else {
31637 result.push(chars[i]);
31638 i += 1;
31639 }
31640 }
31641 result
31642 }
31643
31644 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
31645 /// or PostgreSQL #temp -> TEMPORARY.
31646 /// Also strips # from INSERT INTO #table for non-TSQL targets.
31647 fn transform_select_into(
31648 expr: Expression,
31649 _source: DialectType,
31650 target: DialectType,
31651 ) -> Expression {
31652 use crate::expressions::{CreateTable, Expression, TableRef};
31653
31654 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
31655 if let Expression::Insert(ref insert) = expr {
31656 if insert.table.name.name.starts_with('#')
31657 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
31658 {
31659 let mut new_insert = insert.clone();
31660 new_insert.table.name.name =
31661 insert.table.name.name.trim_start_matches('#').to_string();
31662 return Expression::Insert(new_insert);
31663 }
31664 return expr;
31665 }
31666
31667 if let Expression::Select(ref select) = expr {
31668 if let Some(ref into) = select.into {
31669 let table_name_raw = match &into.this {
31670 Expression::Table(tr) => tr.name.name.clone(),
31671 Expression::Identifier(id) => id.name.clone(),
31672 _ => String::new(),
31673 };
31674 let is_temp = table_name_raw.starts_with('#') || into.temporary;
31675 let clean_name = table_name_raw.trim_start_matches('#').to_string();
31676
31677 match target {
31678 DialectType::DuckDB | DialectType::Snowflake => {
31679 // SELECT INTO -> CREATE TABLE AS SELECT
31680 let mut new_select = select.clone();
31681 new_select.into = None;
31682 let ct = CreateTable {
31683 name: TableRef::new(clean_name),
31684 on_cluster: None,
31685 columns: Vec::new(),
31686 constraints: Vec::new(),
31687 if_not_exists: false,
31688 temporary: is_temp,
31689 or_replace: false,
31690 table_modifier: None,
31691 as_select: Some(Expression::Select(new_select)),
31692 as_select_parenthesized: false,
31693 on_commit: None,
31694 clone_source: None,
31695 clone_at_clause: None,
31696 shallow_clone: false,
31697 deep_clone: false,
31698 is_copy: false,
31699 leading_comments: Vec::new(),
31700 with_properties: Vec::new(),
31701 teradata_post_name_options: Vec::new(),
31702 with_data: None,
31703 with_statistics: None,
31704 teradata_indexes: Vec::new(),
31705 with_cte: None,
31706 properties: Vec::new(),
31707 partition_of: None,
31708 post_table_properties: Vec::new(),
31709 mysql_table_options: Vec::new(),
31710 inherits: Vec::new(),
31711 on_property: None,
31712 copy_grants: false,
31713 using_template: None,
31714 rollup: None,
31715 uuid: None,
31716 with_partition_columns: Vec::new(),
31717 with_connection: None,
31718 };
31719 return Expression::CreateTable(Box::new(ct));
31720 }
31721 DialectType::PostgreSQL | DialectType::Redshift => {
31722 // PostgreSQL: #foo -> INTO TEMPORARY foo
31723 if is_temp && !into.temporary {
31724 let mut new_select = select.clone();
31725 let mut new_into = into.clone();
31726 new_into.temporary = true;
31727 new_into.unlogged = false;
31728 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
31729 new_select.into = Some(new_into);
31730 Expression::Select(new_select)
31731 } else {
31732 expr
31733 }
31734 }
31735 _ => expr,
31736 }
31737 } else {
31738 expr
31739 }
31740 } else {
31741 expr
31742 }
31743 }
31744
31745 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
31746 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
31747 fn transform_create_table_properties(
31748 ct: &mut crate::expressions::CreateTable,
31749 _source: DialectType,
31750 target: DialectType,
31751 ) {
31752 use crate::expressions::{
31753 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
31754 Properties,
31755 };
31756
31757 // Helper to convert a raw property value string to the correct Expression
31758 let value_to_expr = |v: &str| -> Expression {
31759 let trimmed = v.trim();
31760 // Check if it's a quoted string (starts and ends with ')
31761 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
31762 Expression::Literal(Box::new(Literal::String(
31763 trimmed[1..trimmed.len() - 1].to_string(),
31764 )))
31765 }
31766 // Check if it's a number
31767 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
31768 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
31769 }
31770 // Check if it's ARRAY[...] or ARRAY(...)
31771 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31772 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
31773 let inner = trimmed
31774 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
31775 .trim_start_matches('[')
31776 .trim_start_matches('(')
31777 .trim_end_matches(']')
31778 .trim_end_matches(')');
31779 let elements: Vec<Expression> = inner
31780 .split(',')
31781 .map(|e| {
31782 let elem = e.trim().trim_matches('\'');
31783 Expression::Literal(Box::new(Literal::String(elem.to_string())))
31784 })
31785 .collect();
31786 Expression::Function(Box::new(crate::expressions::Function::new(
31787 "ARRAY".to_string(),
31788 elements,
31789 )))
31790 }
31791 // Otherwise, just output as identifier (unquoted)
31792 else {
31793 Expression::Identifier(Identifier::new(trimmed.to_string()))
31794 }
31795 };
31796
31797 if ct.with_properties.is_empty() && ct.properties.is_empty() {
31798 return;
31799 }
31800
31801 // Handle Presto-style WITH properties
31802 if !ct.with_properties.is_empty() {
31803 // Extract FORMAT property and remaining properties
31804 let mut format_value: Option<String> = None;
31805 let mut partitioned_by: Option<String> = None;
31806 let mut other_props: Vec<(String, String)> = Vec::new();
31807
31808 for (key, value) in ct.with_properties.drain(..) {
31809 if key.eq_ignore_ascii_case("FORMAT") {
31810 // Strip surrounding quotes from value if present
31811 format_value = Some(value.trim_matches('\'').to_string());
31812 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
31813 partitioned_by = Some(value);
31814 } else {
31815 other_props.push((key, value));
31816 }
31817 }
31818
31819 match target {
31820 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31821 // Presto: keep WITH properties but lowercase 'format' key
31822 if let Some(fmt) = format_value {
31823 ct.with_properties
31824 .push(("format".to_string(), format!("'{}'", fmt)));
31825 }
31826 if let Some(part) = partitioned_by {
31827 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
31828 let trimmed = part.trim();
31829 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
31830 // Also handle ARRAY['...'] format - keep as-is
31831 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31832 ct.with_properties
31833 .push(("PARTITIONED_BY".to_string(), part));
31834 } else {
31835 // Parse column names from the parenthesized list
31836 let cols: Vec<&str> = inner
31837 .split(',')
31838 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
31839 .collect();
31840 let array_val = format!(
31841 "ARRAY[{}]",
31842 cols.iter()
31843 .map(|c| format!("'{}'", c))
31844 .collect::<Vec<_>>()
31845 .join(", ")
31846 );
31847 ct.with_properties
31848 .push(("PARTITIONED_BY".to_string(), array_val));
31849 }
31850 }
31851 ct.with_properties.extend(other_props);
31852 }
31853 DialectType::Hive => {
31854 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
31855 if let Some(fmt) = format_value {
31856 ct.properties.push(Expression::FileFormatProperty(Box::new(
31857 FileFormatProperty {
31858 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31859 expressions: vec![],
31860 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
31861 value: true,
31862 }))),
31863 },
31864 )));
31865 }
31866 if let Some(_part) = partitioned_by {
31867 // PARTITIONED_BY handling is complex - move columns to partitioned by
31868 // For now, the partition columns are extracted from the column list
31869 Self::apply_partitioned_by(ct, &_part, target);
31870 }
31871 if !other_props.is_empty() {
31872 let eq_exprs: Vec<Expression> = other_props
31873 .into_iter()
31874 .map(|(k, v)| {
31875 Expression::Eq(Box::new(BinaryOp::new(
31876 Expression::Literal(Box::new(Literal::String(k))),
31877 value_to_expr(&v),
31878 )))
31879 })
31880 .collect();
31881 ct.properties
31882 .push(Expression::Properties(Box::new(Properties {
31883 expressions: eq_exprs,
31884 })));
31885 }
31886 }
31887 DialectType::Spark | DialectType::Databricks => {
31888 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
31889 if let Some(fmt) = format_value {
31890 ct.properties.push(Expression::FileFormatProperty(Box::new(
31891 FileFormatProperty {
31892 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31893 expressions: vec![],
31894 hive_format: None, // None means USING syntax
31895 },
31896 )));
31897 }
31898 if let Some(_part) = partitioned_by {
31899 Self::apply_partitioned_by(ct, &_part, target);
31900 }
31901 if !other_props.is_empty() {
31902 let eq_exprs: Vec<Expression> = other_props
31903 .into_iter()
31904 .map(|(k, v)| {
31905 Expression::Eq(Box::new(BinaryOp::new(
31906 Expression::Literal(Box::new(Literal::String(k))),
31907 value_to_expr(&v),
31908 )))
31909 })
31910 .collect();
31911 ct.properties
31912 .push(Expression::Properties(Box::new(Properties {
31913 expressions: eq_exprs,
31914 })));
31915 }
31916 }
31917 DialectType::DuckDB => {
31918 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
31919 // Keep nothing
31920 }
31921 _ => {
31922 // For other dialects, keep WITH properties as-is
31923 if let Some(fmt) = format_value {
31924 ct.with_properties
31925 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
31926 }
31927 if let Some(part) = partitioned_by {
31928 ct.with_properties
31929 .push(("PARTITIONED_BY".to_string(), part));
31930 }
31931 ct.with_properties.extend(other_props);
31932 }
31933 }
31934 }
31935
31936 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
31937 // and Hive STORED AS -> Presto WITH (format=...) conversion
31938 if !ct.properties.is_empty() {
31939 let is_presto_target = matches!(
31940 target,
31941 DialectType::Presto | DialectType::Trino | DialectType::Athena
31942 );
31943 let is_duckdb_target = matches!(target, DialectType::DuckDB);
31944
31945 if is_presto_target || is_duckdb_target {
31946 let mut new_properties = Vec::new();
31947 for prop in ct.properties.drain(..) {
31948 match &prop {
31949 Expression::FileFormatProperty(ffp) => {
31950 if is_presto_target {
31951 // Convert STORED AS/USING to WITH (format=...)
31952 if let Some(ref fmt_expr) = ffp.this {
31953 let fmt_str = match fmt_expr.as_ref() {
31954 Expression::Identifier(id) => id.name.clone(),
31955 Expression::Literal(lit)
31956 if matches!(lit.as_ref(), Literal::String(_)) =>
31957 {
31958 let Literal::String(s) = lit.as_ref() else {
31959 unreachable!()
31960 };
31961 s.clone()
31962 }
31963 _ => {
31964 new_properties.push(prop);
31965 continue;
31966 }
31967 };
31968 ct.with_properties
31969 .push(("format".to_string(), format!("'{}'", fmt_str)));
31970 }
31971 }
31972 // DuckDB: just strip file format properties
31973 }
31974 // Convert TBLPROPERTIES to WITH properties for Presto target
31975 Expression::Properties(props) if is_presto_target => {
31976 for expr in &props.expressions {
31977 if let Expression::Eq(eq) = expr {
31978 // Extract key and value from the Eq expression
31979 let key = match &eq.left {
31980 Expression::Literal(lit)
31981 if matches!(lit.as_ref(), Literal::String(_)) =>
31982 {
31983 let Literal::String(s) = lit.as_ref() else {
31984 unreachable!()
31985 };
31986 s.clone()
31987 }
31988 Expression::Identifier(id) => id.name.clone(),
31989 _ => continue,
31990 };
31991 let value = match &eq.right {
31992 Expression::Literal(lit)
31993 if matches!(lit.as_ref(), Literal::String(_)) =>
31994 {
31995 let Literal::String(s) = lit.as_ref() else {
31996 unreachable!()
31997 };
31998 format!("'{}'", s)
31999 }
32000 Expression::Literal(lit)
32001 if matches!(lit.as_ref(), Literal::Number(_)) =>
32002 {
32003 let Literal::Number(n) = lit.as_ref() else {
32004 unreachable!()
32005 };
32006 n.clone()
32007 }
32008 Expression::Identifier(id) => id.name.clone(),
32009 _ => continue,
32010 };
32011 ct.with_properties.push((key, value));
32012 }
32013 }
32014 }
32015 // Convert PartitionedByProperty for Presto target
32016 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
32017 // Check if it contains ColumnDef expressions (Hive-style with types)
32018 if let Expression::Tuple(ref tuple) = *pbp.this {
32019 let mut col_names: Vec<String> = Vec::new();
32020 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
32021 let mut has_col_defs = false;
32022 for expr in &tuple.expressions {
32023 if let Expression::ColumnDef(ref cd) = expr {
32024 has_col_defs = true;
32025 col_names.push(cd.name.name.clone());
32026 col_defs.push(*cd.clone());
32027 } else if let Expression::Column(ref col) = expr {
32028 col_names.push(col.name.name.clone());
32029 } else if let Expression::Identifier(ref id) = expr {
32030 col_names.push(id.name.clone());
32031 } else {
32032 // For function expressions like MONTHS(y), serialize to SQL
32033 let generic = Dialect::get(DialectType::Generic);
32034 if let Ok(sql) = generic.generate(expr) {
32035 col_names.push(sql);
32036 }
32037 }
32038 }
32039 if has_col_defs {
32040 // Merge partition column defs into the main column list
32041 for cd in col_defs {
32042 ct.columns.push(cd);
32043 }
32044 }
32045 if !col_names.is_empty() {
32046 // Add PARTITIONED_BY property
32047 let array_val = format!(
32048 "ARRAY[{}]",
32049 col_names
32050 .iter()
32051 .map(|n| format!("'{}'", n))
32052 .collect::<Vec<_>>()
32053 .join(", ")
32054 );
32055 ct.with_properties
32056 .push(("PARTITIONED_BY".to_string(), array_val));
32057 }
32058 }
32059 // Skip - don't keep in properties
32060 }
32061 _ => {
32062 if !is_duckdb_target {
32063 new_properties.push(prop);
32064 }
32065 }
32066 }
32067 }
32068 ct.properties = new_properties;
32069 } else {
32070 // For Hive/Spark targets, unquote format names in STORED AS
32071 for prop in &mut ct.properties {
32072 if let Expression::FileFormatProperty(ref mut ffp) = prop {
32073 if let Some(ref mut fmt_expr) = ffp.this {
32074 if let Expression::Literal(lit) = fmt_expr.as_ref() {
32075 if let Literal::String(s) = lit.as_ref() {
32076 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
32077 let unquoted = s.clone();
32078 *fmt_expr =
32079 Box::new(Expression::Identifier(Identifier::new(unquoted)));
32080 }
32081 }
32082 }
32083 }
32084 }
32085 }
32086 }
32087 }
32088
32089 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
32090 fn apply_partitioned_by(
32091 ct: &mut crate::expressions::CreateTable,
32092 partitioned_by_value: &str,
32093 target: DialectType,
32094 ) {
32095 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
32096
32097 // Parse the ARRAY['col1', 'col2'] value to extract column names
32098 let mut col_names: Vec<String> = Vec::new();
32099 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
32100 let inner = partitioned_by_value
32101 .trim()
32102 .trim_start_matches("ARRAY")
32103 .trim_start_matches('[')
32104 .trim_start_matches('(')
32105 .trim_end_matches(']')
32106 .trim_end_matches(')');
32107 for part in inner.split(',') {
32108 let col = part.trim().trim_matches('\'').trim_matches('"');
32109 if !col.is_empty() {
32110 col_names.push(col.to_string());
32111 }
32112 }
32113
32114 if col_names.is_empty() {
32115 return;
32116 }
32117
32118 if matches!(target, DialectType::Hive) {
32119 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
32120 let mut partition_col_defs = Vec::new();
32121 for col_name in &col_names {
32122 // Find and remove from columns
32123 if let Some(pos) = ct
32124 .columns
32125 .iter()
32126 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
32127 {
32128 let col_def = ct.columns.remove(pos);
32129 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
32130 }
32131 }
32132 if !partition_col_defs.is_empty() {
32133 ct.properties
32134 .push(Expression::PartitionedByProperty(Box::new(
32135 PartitionedByProperty {
32136 this: Box::new(Expression::Tuple(Box::new(Tuple {
32137 expressions: partition_col_defs,
32138 }))),
32139 },
32140 )));
32141 }
32142 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
32143 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
32144 // Use quoted identifiers to match the quoting style of the original column definitions
32145 let partition_exprs: Vec<Expression> = col_names
32146 .iter()
32147 .map(|name| {
32148 // Check if the column exists in the column list and use its quoting
32149 let is_quoted = ct
32150 .columns
32151 .iter()
32152 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
32153 let ident = if is_quoted {
32154 Identifier::quoted(name.clone())
32155 } else {
32156 Identifier::new(name.clone())
32157 };
32158 Expression::boxed_column(Column {
32159 name: ident,
32160 table: None,
32161 join_mark: false,
32162 trailing_comments: Vec::new(),
32163 span: None,
32164 inferred_type: None,
32165 })
32166 })
32167 .collect();
32168 ct.properties
32169 .push(Expression::PartitionedByProperty(Box::new(
32170 PartitionedByProperty {
32171 this: Box::new(Expression::Tuple(Box::new(Tuple {
32172 expressions: partition_exprs,
32173 }))),
32174 },
32175 )));
32176 }
32177 // DuckDB: strip partitioned_by entirely (already handled)
32178 }
32179
32180 /// Convert a DataType to Spark's type string format (using angle brackets)
32181 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
32182 use crate::expressions::DataType;
32183 match dt {
32184 DataType::Int { .. } => "INT".to_string(),
32185 DataType::BigInt { .. } => "BIGINT".to_string(),
32186 DataType::SmallInt { .. } => "SMALLINT".to_string(),
32187 DataType::TinyInt { .. } => "TINYINT".to_string(),
32188 DataType::Float { .. } => "FLOAT".to_string(),
32189 DataType::Double { .. } => "DOUBLE".to_string(),
32190 DataType::Decimal {
32191 precision: Some(p),
32192 scale: Some(s),
32193 } => format!("DECIMAL({}, {})", p, s),
32194 DataType::Decimal {
32195 precision: Some(p), ..
32196 } => format!("DECIMAL({})", p),
32197 DataType::Decimal { .. } => "DECIMAL".to_string(),
32198 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
32199 "STRING".to_string()
32200 }
32201 DataType::Char { .. } => "STRING".to_string(),
32202 DataType::Boolean => "BOOLEAN".to_string(),
32203 DataType::Date => "DATE".to_string(),
32204 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
32205 DataType::Json | DataType::JsonB => "STRING".to_string(),
32206 DataType::Binary { .. } => "BINARY".to_string(),
32207 DataType::Array { element_type, .. } => {
32208 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
32209 }
32210 DataType::Map {
32211 key_type,
32212 value_type,
32213 } => format!(
32214 "MAP<{}, {}>",
32215 Self::data_type_to_spark_string(key_type),
32216 Self::data_type_to_spark_string(value_type)
32217 ),
32218 DataType::Struct { fields, .. } => {
32219 let field_strs: Vec<String> = fields
32220 .iter()
32221 .map(|f| {
32222 if f.name.is_empty() {
32223 Self::data_type_to_spark_string(&f.data_type)
32224 } else {
32225 format!(
32226 "{}: {}",
32227 f.name,
32228 Self::data_type_to_spark_string(&f.data_type)
32229 )
32230 }
32231 })
32232 .collect();
32233 format!("STRUCT<{}>", field_strs.join(", "))
32234 }
32235 DataType::Custom { name } => name.clone(),
32236 _ => format!("{:?}", dt),
32237 }
32238 }
32239
32240 /// Extract value and unit from an Interval expression
32241 /// Returns (value_expression, IntervalUnit)
32242 fn extract_interval_parts(
32243 interval_expr: &Expression,
32244 ) -> Option<(Expression, crate::expressions::IntervalUnit)> {
32245 use crate::expressions::{DataType, IntervalUnit, IntervalUnitSpec, Literal};
32246
32247 fn unit_from_str(unit: &str) -> Option<IntervalUnit> {
32248 match unit.trim().to_ascii_uppercase().as_str() {
32249 "YEAR" | "YEARS" => Some(IntervalUnit::Year),
32250 "QUARTER" | "QUARTERS" => Some(IntervalUnit::Quarter),
32251 "MONTH" | "MONTHS" | "MON" | "MONS" | "MM" => Some(IntervalUnit::Month),
32252 "WEEK" | "WEEKS" | "ISOWEEK" => Some(IntervalUnit::Week),
32253 "DAY" | "DAYS" => Some(IntervalUnit::Day),
32254 "HOUR" | "HOURS" => Some(IntervalUnit::Hour),
32255 "MINUTE" | "MINUTES" => Some(IntervalUnit::Minute),
32256 "SECOND" | "SECONDS" => Some(IntervalUnit::Second),
32257 "MILLISECOND" | "MILLISECONDS" => Some(IntervalUnit::Millisecond),
32258 "MICROSECOND" | "MICROSECONDS" => Some(IntervalUnit::Microsecond),
32259 "NANOSECOND" | "NANOSECONDS" => Some(IntervalUnit::Nanosecond),
32260 _ => None,
32261 }
32262 }
32263
32264 fn parts_from_literal_string(s: &str) -> Option<(Expression, IntervalUnit)> {
32265 let mut parts = s.split_whitespace();
32266 let value = parts.next()?;
32267 let unit = unit_from_str(parts.next()?)?;
32268 Some((
32269 Expression::Literal(Box::new(Literal::String(value.to_string()))),
32270 unit,
32271 ))
32272 }
32273
32274 fn unit_from_spec(unit: &IntervalUnitSpec) -> Option<IntervalUnit> {
32275 match unit {
32276 IntervalUnitSpec::Simple { unit, .. } => Some(*unit),
32277 IntervalUnitSpec::Expr(expr) => match expr.as_ref() {
32278 Expression::Day(_) => Some(IntervalUnit::Day),
32279 Expression::Month(_) => Some(IntervalUnit::Month),
32280 Expression::Year(_) => Some(IntervalUnit::Year),
32281 Expression::Identifier(id) => unit_from_str(&id.name),
32282 Expression::Var(v) => unit_from_str(&v.this),
32283 Expression::Column(col) => unit_from_str(&col.name.name),
32284 _ => None,
32285 },
32286 _ => None,
32287 }
32288 }
32289
32290 match interval_expr {
32291 Expression::Interval(iv) => {
32292 let val = iv.this.clone().unwrap_or(Expression::number(0));
32293 if let Expression::Literal(lit) = &val {
32294 if let Literal::String(s) = lit.as_ref() {
32295 if let Some(parts) = parts_from_literal_string(s) {
32296 return Some(parts);
32297 }
32298 }
32299 }
32300 let unit = iv
32301 .unit
32302 .as_ref()
32303 .and_then(unit_from_spec)
32304 .unwrap_or(IntervalUnit::Day);
32305 Some((val, unit))
32306 }
32307 Expression::Cast(cast) if matches!(cast.to, DataType::Interval { .. }) => {
32308 if let Expression::Literal(lit) = &cast.this {
32309 if let Literal::String(s) = lit.as_ref() {
32310 if let Some(parts) = parts_from_literal_string(s) {
32311 return Some(parts);
32312 }
32313 }
32314 }
32315 let unit = match &cast.to {
32316 DataType::Interval {
32317 unit: Some(unit), ..
32318 } => unit_from_str(unit).unwrap_or(IntervalUnit::Day),
32319 _ => IntervalUnit::Day,
32320 };
32321 Some((cast.this.clone(), unit))
32322 }
32323 _ => None,
32324 }
32325 }
32326
32327 fn rewrite_tsql_interval_arithmetic(expr: &Expression) -> Option<Expression> {
32328 match expr {
32329 Expression::Add(op) => {
32330 Self::extract_interval_parts(&op.right)?;
32331 Some(Self::build_tsql_dateadd_from_interval(
32332 op.left.clone(),
32333 &op.right,
32334 false,
32335 ))
32336 }
32337 Expression::Sub(op) => {
32338 Self::extract_interval_parts(&op.right)?;
32339 Some(Self::build_tsql_dateadd_from_interval(
32340 op.left.clone(),
32341 &op.right,
32342 true,
32343 ))
32344 }
32345 _ => None,
32346 }
32347 }
32348
32349 fn build_tsql_dateadd_from_interval(
32350 date: Expression,
32351 interval: &Expression,
32352 subtract: bool,
32353 ) -> Expression {
32354 let (value, unit) = Self::extract_interval_parts(interval)
32355 .unwrap_or_else(|| (interval.clone(), crate::expressions::IntervalUnit::Day));
32356 let unit = Self::interval_unit_to_string(&unit);
32357 let amount = Self::tsql_dateadd_amount(value, subtract);
32358
32359 Expression::Function(Box::new(Function::new(
32360 "DATEADD".to_string(),
32361 vec![Expression::Identifier(Identifier::new(unit)), amount, date],
32362 )))
32363 }
32364
32365 fn tsql_dateadd_amount(value: Expression, negate: bool) -> Expression {
32366 use crate::expressions::{Parameter, ParameterStyle, UnaryOp};
32367
32368 fn numeric_literal_value(value: &Expression) -> Option<&str> {
32369 match value {
32370 Expression::Literal(lit) => match lit.as_ref() {
32371 crate::expressions::Literal::Number(n)
32372 | crate::expressions::Literal::String(n) => Some(n.as_str()),
32373 _ => None,
32374 },
32375 _ => None,
32376 }
32377 }
32378
32379 fn colon_parameter(value: &Expression) -> Option<Expression> {
32380 let Expression::Literal(lit) = value else {
32381 return None;
32382 };
32383 let crate::expressions::Literal::String(s) = lit.as_ref() else {
32384 return None;
32385 };
32386 let name = s.strip_prefix(':')?;
32387 if name.is_empty()
32388 || !name
32389 .chars()
32390 .all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
32391 {
32392 return None;
32393 }
32394
32395 Some(Expression::Parameter(Box::new(Parameter {
32396 name: if name.chars().all(|ch| ch.is_ascii_digit()) {
32397 None
32398 } else {
32399 Some(name.to_string())
32400 },
32401 index: name.parse::<u32>().ok(),
32402 style: ParameterStyle::Colon,
32403 quoted: false,
32404 string_quoted: false,
32405 expression: None,
32406 })))
32407 }
32408
32409 let value = colon_parameter(&value).unwrap_or(value);
32410
32411 if let Some(n) = numeric_literal_value(&value) {
32412 if let Ok(parsed) = n.parse::<f64>() {
32413 let normalized = if negate { -parsed } else { parsed };
32414 let rendered = if normalized.fract() == 0.0 {
32415 format!("{}", normalized as i64)
32416 } else {
32417 normalized.to_string()
32418 };
32419 return Expression::Literal(Box::new(crate::expressions::Literal::Number(
32420 rendered,
32421 )));
32422 }
32423 }
32424
32425 if !negate {
32426 return value;
32427 }
32428
32429 match value {
32430 Expression::Neg(op) => op.this,
32431 other => Expression::Neg(Box::new(UnaryOp {
32432 this: other,
32433 inferred_type: None,
32434 })),
32435 }
32436 }
32437
32438 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
32439 fn normalize_bigquery_function(
32440 e: Expression,
32441 source: DialectType,
32442 target: DialectType,
32443 ) -> Result<Expression> {
32444 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
32445
32446 let f = if let Expression::Function(f) = e {
32447 *f
32448 } else {
32449 return Ok(e);
32450 };
32451 let name = f.name.to_ascii_uppercase();
32452 let mut args = f.args;
32453
32454 /// Helper to extract unit string from an identifier, column, or literal expression
32455 fn get_unit_str(expr: &Expression) -> String {
32456 match expr {
32457 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
32458 Expression::Var(v) => v.this.to_ascii_uppercase(),
32459 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
32460 let Literal::String(s) = lit.as_ref() else {
32461 unreachable!()
32462 };
32463 s.to_ascii_uppercase()
32464 }
32465 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
32466 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
32467 Expression::Function(f) => {
32468 let base = f.name.to_ascii_uppercase();
32469 if !f.args.is_empty() {
32470 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
32471 let inner = get_unit_str(&f.args[0]);
32472 format!("{}({})", base, inner)
32473 } else {
32474 base
32475 }
32476 }
32477 _ => "DAY".to_string(),
32478 }
32479 }
32480
32481 /// Parse unit string to IntervalUnit
32482 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
32483 match s {
32484 "YEAR" => crate::expressions::IntervalUnit::Year,
32485 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
32486 "MONTH" => crate::expressions::IntervalUnit::Month,
32487 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
32488 "DAY" => crate::expressions::IntervalUnit::Day,
32489 "HOUR" => crate::expressions::IntervalUnit::Hour,
32490 "MINUTE" => crate::expressions::IntervalUnit::Minute,
32491 "SECOND" => crate::expressions::IntervalUnit::Second,
32492 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
32493 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
32494 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
32495 _ => crate::expressions::IntervalUnit::Day,
32496 }
32497 }
32498
32499 match name.as_str() {
32500 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
32501 // (BigQuery: result = date1 - date2, Standard: result = end - start)
32502 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
32503 let date1 = args.remove(0);
32504 let date2 = args.remove(0);
32505 let unit_expr = args.remove(0);
32506 let unit_str = get_unit_str(&unit_expr);
32507
32508 if matches!(target, DialectType::BigQuery) {
32509 // BigQuery -> BigQuery: just uppercase the unit
32510 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
32511 return Ok(Expression::Function(Box::new(Function::new(
32512 f.name,
32513 vec![date1, date2, unit],
32514 ))));
32515 }
32516
32517 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
32518 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
32519 if matches!(target, DialectType::Snowflake) {
32520 return Ok(Expression::TimestampDiff(Box::new(
32521 crate::expressions::TimestampDiff {
32522 this: Box::new(date2),
32523 expression: Box::new(date1),
32524 unit: Some(unit_str),
32525 },
32526 )));
32527 }
32528
32529 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
32530 if matches!(target, DialectType::DuckDB) {
32531 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
32532 // CAST to TIME
32533 let cast_fn = |e: Expression| -> Expression {
32534 match e {
32535 Expression::Literal(lit)
32536 if matches!(lit.as_ref(), Literal::String(_)) =>
32537 {
32538 let Literal::String(s) = lit.as_ref() else {
32539 unreachable!()
32540 };
32541 Expression::Cast(Box::new(Cast {
32542 this: Expression::Literal(Box::new(Literal::String(
32543 s.clone(),
32544 ))),
32545 to: DataType::Custom {
32546 name: "TIME".to_string(),
32547 },
32548 trailing_comments: vec![],
32549 double_colon_syntax: false,
32550 format: None,
32551 default: None,
32552 inferred_type: None,
32553 }))
32554 }
32555 other => other,
32556 }
32557 };
32558 (cast_fn(date1), cast_fn(date2))
32559 } else if name == "DATETIME_DIFF" {
32560 // CAST to TIMESTAMP
32561 (
32562 Self::ensure_cast_timestamp(date1),
32563 Self::ensure_cast_timestamp(date2),
32564 )
32565 } else {
32566 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
32567 (
32568 Self::ensure_cast_timestamptz(date1),
32569 Self::ensure_cast_timestamptz(date2),
32570 )
32571 };
32572 return Ok(Expression::Function(Box::new(Function::new(
32573 "DATE_DIFF".to_string(),
32574 vec![
32575 Expression::Literal(Box::new(Literal::String(unit_str))),
32576 cast_d2,
32577 cast_d1,
32578 ],
32579 ))));
32580 }
32581
32582 // Convert to standard TIMESTAMPDIFF(unit, start, end)
32583 let unit = Expression::Identifier(Identifier::new(unit_str));
32584 Ok(Expression::Function(Box::new(Function::new(
32585 "TIMESTAMPDIFF".to_string(),
32586 vec![unit, date2, date1],
32587 ))))
32588 }
32589
32590 // DATEDIFF(unit, start, end) -> target-specific form
32591 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
32592 "DATEDIFF" if args.len() == 3 => {
32593 let arg0 = args.remove(0);
32594 let arg1 = args.remove(0);
32595 let arg2 = args.remove(0);
32596 let unit_str = get_unit_str(&arg0);
32597
32598 // Redshift DATEDIFF(unit, start, end) order: result = end - start
32599 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
32600 // TSQL DATEDIFF(unit, start, end) order: result = end - start
32601
32602 if matches!(target, DialectType::Snowflake) {
32603 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
32604 let unit = Expression::Identifier(Identifier::new(unit_str));
32605 return Ok(Expression::Function(Box::new(Function::new(
32606 "DATEDIFF".to_string(),
32607 vec![unit, arg1, arg2],
32608 ))));
32609 }
32610
32611 if matches!(target, DialectType::DuckDB) {
32612 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
32613 let cast_d1 = Self::ensure_cast_timestamp(arg1);
32614 let cast_d2 = Self::ensure_cast_timestamp(arg2);
32615 return Ok(Expression::Function(Box::new(Function::new(
32616 "DATE_DIFF".to_string(),
32617 vec![
32618 Expression::Literal(Box::new(Literal::String(unit_str))),
32619 cast_d1,
32620 cast_d2,
32621 ],
32622 ))));
32623 }
32624
32625 if matches!(target, DialectType::BigQuery) {
32626 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
32627 let cast_d1 = Self::ensure_cast_datetime(arg1);
32628 let cast_d2 = Self::ensure_cast_datetime(arg2);
32629 let unit = Expression::Identifier(Identifier::new(unit_str));
32630 return Ok(Expression::Function(Box::new(Function::new(
32631 "DATE_DIFF".to_string(),
32632 vec![cast_d2, cast_d1, unit],
32633 ))));
32634 }
32635
32636 if matches!(target, DialectType::Spark | DialectType::Databricks) {
32637 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
32638 let unit = Expression::Identifier(Identifier::new(unit_str));
32639 return Ok(Expression::Function(Box::new(Function::new(
32640 "DATEDIFF".to_string(),
32641 vec![unit, arg1, arg2],
32642 ))));
32643 }
32644
32645 if matches!(target, DialectType::Hive) {
32646 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
32647 match unit_str.as_str() {
32648 "MONTH" => {
32649 return Ok(Expression::Function(Box::new(Function::new(
32650 "CAST".to_string(),
32651 vec![Expression::Function(Box::new(Function::new(
32652 "MONTHS_BETWEEN".to_string(),
32653 vec![arg2, arg1],
32654 )))],
32655 ))));
32656 }
32657 "WEEK" => {
32658 return Ok(Expression::Cast(Box::new(Cast {
32659 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
32660 Expression::Function(Box::new(Function::new(
32661 "DATEDIFF".to_string(),
32662 vec![arg2, arg1],
32663 ))),
32664 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
32665 ))),
32666 to: DataType::Int {
32667 length: None,
32668 integer_spelling: false,
32669 },
32670 trailing_comments: vec![],
32671 double_colon_syntax: false,
32672 format: None,
32673 default: None,
32674 inferred_type: None,
32675 })));
32676 }
32677 _ => {
32678 // Default: DATEDIFF(end, start) for DAY
32679 return Ok(Expression::Function(Box::new(Function::new(
32680 "DATEDIFF".to_string(),
32681 vec![arg2, arg1],
32682 ))));
32683 }
32684 }
32685 }
32686
32687 if matches!(
32688 target,
32689 DialectType::Presto | DialectType::Trino | DialectType::Athena
32690 ) {
32691 // Presto/Trino: DATE_DIFF('UNIT', start, end)
32692 return Ok(Expression::Function(Box::new(Function::new(
32693 "DATE_DIFF".to_string(),
32694 vec![
32695 Expression::Literal(Box::new(Literal::String(unit_str))),
32696 arg1,
32697 arg2,
32698 ],
32699 ))));
32700 }
32701
32702 if matches!(target, DialectType::TSQL) {
32703 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
32704 let cast_d2 = Self::ensure_cast_datetime2(arg2);
32705 let unit = Expression::Identifier(Identifier::new(unit_str));
32706 return Ok(Expression::Function(Box::new(Function::new(
32707 "DATEDIFF".to_string(),
32708 vec![unit, arg1, cast_d2],
32709 ))));
32710 }
32711
32712 if matches!(target, DialectType::PostgreSQL) {
32713 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
32714 // For now, use DATEDIFF (passthrough) with uppercased unit
32715 let unit = Expression::Identifier(Identifier::new(unit_str));
32716 return Ok(Expression::Function(Box::new(Function::new(
32717 "DATEDIFF".to_string(),
32718 vec![unit, arg1, arg2],
32719 ))));
32720 }
32721
32722 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
32723 let unit = Expression::Identifier(Identifier::new(unit_str));
32724 Ok(Expression::Function(Box::new(Function::new(
32725 "DATEDIFF".to_string(),
32726 vec![unit, arg1, arg2],
32727 ))))
32728 }
32729
32730 // DATE_DIFF(date1, date2, unit) -> standard form
32731 "DATE_DIFF" if args.len() == 3 => {
32732 let date1 = args.remove(0);
32733 let date2 = args.remove(0);
32734 let unit_expr = args.remove(0);
32735 let unit_str = get_unit_str(&unit_expr);
32736
32737 if matches!(target, DialectType::BigQuery) {
32738 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
32739 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
32740 "WEEK".to_string()
32741 } else {
32742 unit_str
32743 };
32744 let norm_d1 = Self::date_literal_to_cast(date1);
32745 let norm_d2 = Self::date_literal_to_cast(date2);
32746 let unit = Expression::Identifier(Identifier::new(norm_unit));
32747 return Ok(Expression::Function(Box::new(Function::new(
32748 f.name,
32749 vec![norm_d1, norm_d2, unit],
32750 ))));
32751 }
32752
32753 if matches!(target, DialectType::MySQL) {
32754 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
32755 let norm_d1 = Self::date_literal_to_cast(date1);
32756 let norm_d2 = Self::date_literal_to_cast(date2);
32757 return Ok(Expression::Function(Box::new(Function::new(
32758 "DATEDIFF".to_string(),
32759 vec![norm_d1, norm_d2],
32760 ))));
32761 }
32762
32763 if matches!(target, DialectType::StarRocks) {
32764 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
32765 let norm_d1 = Self::date_literal_to_cast(date1);
32766 let norm_d2 = Self::date_literal_to_cast(date2);
32767 return Ok(Expression::Function(Box::new(Function::new(
32768 "DATE_DIFF".to_string(),
32769 vec![
32770 Expression::Literal(Box::new(Literal::String(unit_str))),
32771 norm_d1,
32772 norm_d2,
32773 ],
32774 ))));
32775 }
32776
32777 if matches!(target, DialectType::DuckDB) {
32778 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
32779 let norm_d1 = Self::ensure_cast_date(date1);
32780 let norm_d2 = Self::ensure_cast_date(date2);
32781
32782 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
32783 let is_week_variant = unit_str == "WEEK"
32784 || unit_str.starts_with("WEEK(")
32785 || unit_str == "ISOWEEK";
32786 if is_week_variant {
32787 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
32788 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
32789 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
32790 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
32791 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
32792 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
32793 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
32794 Some("1") // Shift Sunday to Monday alignment
32795 } else if unit_str == "WEEK(SATURDAY)" {
32796 Some("-5")
32797 } else if unit_str == "WEEK(TUESDAY)" {
32798 Some("-1")
32799 } else if unit_str == "WEEK(WEDNESDAY)" {
32800 Some("-2")
32801 } else if unit_str == "WEEK(THURSDAY)" {
32802 Some("-3")
32803 } else if unit_str == "WEEK(FRIDAY)" {
32804 Some("-4")
32805 } else {
32806 Some("1") // default to Sunday
32807 };
32808
32809 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
32810 let shifted = if let Some(off) = offset {
32811 let interval =
32812 Expression::Interval(Box::new(crate::expressions::Interval {
32813 this: Some(Expression::Literal(Box::new(Literal::String(
32814 off.to_string(),
32815 )))),
32816 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32817 unit: crate::expressions::IntervalUnit::Day,
32818 use_plural: false,
32819 }),
32820 }));
32821 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
32822 date, interval,
32823 )))
32824 } else {
32825 date
32826 };
32827 Expression::Function(Box::new(Function::new(
32828 "DATE_TRUNC".to_string(),
32829 vec![
32830 Expression::Literal(Box::new(Literal::String(
32831 "WEEK".to_string(),
32832 ))),
32833 shifted,
32834 ],
32835 )))
32836 };
32837
32838 let trunc_d2 = make_trunc(norm_d2, day_offset);
32839 let trunc_d1 = make_trunc(norm_d1, day_offset);
32840 return Ok(Expression::Function(Box::new(Function::new(
32841 "DATE_DIFF".to_string(),
32842 vec![
32843 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
32844 trunc_d2,
32845 trunc_d1,
32846 ],
32847 ))));
32848 }
32849
32850 return Ok(Expression::Function(Box::new(Function::new(
32851 "DATE_DIFF".to_string(),
32852 vec![
32853 Expression::Literal(Box::new(Literal::String(unit_str))),
32854 norm_d2,
32855 norm_d1,
32856 ],
32857 ))));
32858 }
32859
32860 // Default: DATEDIFF(unit, date2, date1)
32861 let unit = Expression::Identifier(Identifier::new(unit_str));
32862 Ok(Expression::Function(Box::new(Function::new(
32863 "DATEDIFF".to_string(),
32864 vec![unit, date2, date1],
32865 ))))
32866 }
32867
32868 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
32869 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
32870 let ts = args.remove(0);
32871 let interval_expr = args.remove(0);
32872 let (val, unit) =
32873 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
32874 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
32875 });
32876
32877 match target {
32878 DialectType::Snowflake => {
32879 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
32880 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
32881 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
32882 let unit_str = Self::interval_unit_to_string(&unit);
32883 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
32884 Ok(Expression::TimestampAdd(Box::new(
32885 crate::expressions::TimestampAdd {
32886 this: Box::new(val),
32887 expression: Box::new(cast_ts),
32888 unit: Some(unit_str.to_string()),
32889 },
32890 )))
32891 }
32892 DialectType::Spark | DialectType::Databricks => {
32893 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
32894 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
32895 let interval =
32896 Expression::Interval(Box::new(crate::expressions::Interval {
32897 this: Some(val),
32898 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32899 unit,
32900 use_plural: false,
32901 }),
32902 }));
32903 Ok(Expression::Add(Box::new(
32904 crate::expressions::BinaryOp::new(ts, interval),
32905 )))
32906 } else if name == "DATETIME_ADD"
32907 && matches!(target, DialectType::Databricks)
32908 {
32909 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
32910 let unit_str = Self::interval_unit_to_string(&unit);
32911 Ok(Expression::Function(Box::new(Function::new(
32912 "TIMESTAMPADD".to_string(),
32913 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
32914 ))))
32915 } else {
32916 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
32917 let unit_str = Self::interval_unit_to_string(&unit);
32918 let cast_ts =
32919 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
32920 Self::maybe_cast_ts(ts)
32921 } else {
32922 ts
32923 };
32924 Ok(Expression::Function(Box::new(Function::new(
32925 "DATE_ADD".to_string(),
32926 vec![
32927 Expression::Identifier(Identifier::new(unit_str)),
32928 val,
32929 cast_ts,
32930 ],
32931 ))))
32932 }
32933 }
32934 DialectType::MySQL => {
32935 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
32936 let mysql_ts = if name.starts_with("TIMESTAMP") {
32937 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
32938 match &ts {
32939 Expression::Function(ref inner_f)
32940 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
32941 {
32942 // Already wrapped, keep as-is
32943 ts
32944 }
32945 _ => {
32946 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
32947 let unwrapped = match ts {
32948 Expression::Literal(lit)
32949 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
32950 {
32951 let Literal::Timestamp(s) = lit.as_ref() else {
32952 unreachable!()
32953 };
32954 Expression::Literal(Box::new(Literal::String(
32955 s.clone(),
32956 )))
32957 }
32958 other => other,
32959 };
32960 Expression::Function(Box::new(Function::new(
32961 "TIMESTAMP".to_string(),
32962 vec![unwrapped],
32963 )))
32964 }
32965 }
32966 } else {
32967 ts
32968 };
32969 Ok(Expression::DateAdd(Box::new(
32970 crate::expressions::DateAddFunc {
32971 this: mysql_ts,
32972 interval: val,
32973 unit,
32974 },
32975 )))
32976 }
32977 _ => {
32978 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
32979 let cast_ts = if matches!(target, DialectType::DuckDB) {
32980 if name == "DATETIME_ADD" {
32981 Self::ensure_cast_timestamp(ts)
32982 } else if name.starts_with("TIMESTAMP") {
32983 Self::maybe_cast_ts_to_tz(ts, &name)
32984 } else {
32985 ts
32986 }
32987 } else {
32988 ts
32989 };
32990 Ok(Expression::DateAdd(Box::new(
32991 crate::expressions::DateAddFunc {
32992 this: cast_ts,
32993 interval: val,
32994 unit,
32995 },
32996 )))
32997 }
32998 }
32999 }
33000
33001 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
33002 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
33003 let ts = args.remove(0);
33004 let interval_expr = args.remove(0);
33005 let (val, unit) =
33006 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
33007 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
33008 });
33009
33010 match target {
33011 DialectType::Snowflake => {
33012 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
33013 let unit_str = Self::interval_unit_to_string(&unit);
33014 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
33015 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
33016 val,
33017 Expression::Neg(Box::new(crate::expressions::UnaryOp {
33018 this: Expression::number(1),
33019 inferred_type: None,
33020 })),
33021 )));
33022 Ok(Expression::TimestampAdd(Box::new(
33023 crate::expressions::TimestampAdd {
33024 this: Box::new(neg_val),
33025 expression: Box::new(cast_ts),
33026 unit: Some(unit_str.to_string()),
33027 },
33028 )))
33029 }
33030 DialectType::Spark | DialectType::Databricks => {
33031 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
33032 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
33033 {
33034 // Spark: ts - INTERVAL val UNIT
33035 let cast_ts = if name.starts_with("TIMESTAMP") {
33036 Self::maybe_cast_ts(ts)
33037 } else {
33038 ts
33039 };
33040 let interval =
33041 Expression::Interval(Box::new(crate::expressions::Interval {
33042 this: Some(val),
33043 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33044 unit,
33045 use_plural: false,
33046 }),
33047 }));
33048 Ok(Expression::Sub(Box::new(
33049 crate::expressions::BinaryOp::new(cast_ts, interval),
33050 )))
33051 } else {
33052 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
33053 let unit_str = Self::interval_unit_to_string(&unit);
33054 let neg_val =
33055 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
33056 val,
33057 Expression::Neg(Box::new(crate::expressions::UnaryOp {
33058 this: Expression::number(1),
33059 inferred_type: None,
33060 })),
33061 )));
33062 Ok(Expression::Function(Box::new(Function::new(
33063 "TIMESTAMPADD".to_string(),
33064 vec![
33065 Expression::Identifier(Identifier::new(unit_str)),
33066 neg_val,
33067 ts,
33068 ],
33069 ))))
33070 }
33071 }
33072 DialectType::MySQL => {
33073 let mysql_ts = if name.starts_with("TIMESTAMP") {
33074 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
33075 match &ts {
33076 Expression::Function(ref inner_f)
33077 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
33078 {
33079 // Already wrapped, keep as-is
33080 ts
33081 }
33082 _ => {
33083 let unwrapped = match ts {
33084 Expression::Literal(lit)
33085 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
33086 {
33087 let Literal::Timestamp(s) = lit.as_ref() else {
33088 unreachable!()
33089 };
33090 Expression::Literal(Box::new(Literal::String(
33091 s.clone(),
33092 )))
33093 }
33094 other => other,
33095 };
33096 Expression::Function(Box::new(Function::new(
33097 "TIMESTAMP".to_string(),
33098 vec![unwrapped],
33099 )))
33100 }
33101 }
33102 } else {
33103 ts
33104 };
33105 Ok(Expression::DateSub(Box::new(
33106 crate::expressions::DateAddFunc {
33107 this: mysql_ts,
33108 interval: val,
33109 unit,
33110 },
33111 )))
33112 }
33113 _ => {
33114 let cast_ts = if matches!(target, DialectType::DuckDB) {
33115 if name == "DATETIME_SUB" {
33116 Self::ensure_cast_timestamp(ts)
33117 } else if name.starts_with("TIMESTAMP") {
33118 Self::maybe_cast_ts_to_tz(ts, &name)
33119 } else {
33120 ts
33121 }
33122 } else {
33123 ts
33124 };
33125 Ok(Expression::DateSub(Box::new(
33126 crate::expressions::DateAddFunc {
33127 this: cast_ts,
33128 interval: val,
33129 unit,
33130 },
33131 )))
33132 }
33133 }
33134 }
33135
33136 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
33137 "DATE_SUB" if args.len() == 2 => {
33138 let date = args.remove(0);
33139 let interval_expr = args.remove(0);
33140 let (val, unit) =
33141 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
33142 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
33143 });
33144
33145 match target {
33146 DialectType::Databricks | DialectType::Spark => {
33147 // Databricks/Spark: DATE_ADD(date, -val)
33148 // Use DateAdd expression with negative val so it generates correctly
33149 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
33150 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
33151 // Instead, we directly output as a simple negated DateSub
33152 Ok(Expression::DateSub(Box::new(
33153 crate::expressions::DateAddFunc {
33154 this: date,
33155 interval: val,
33156 unit,
33157 },
33158 )))
33159 }
33160 DialectType::DuckDB => {
33161 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
33162 let cast_date = Self::ensure_cast_date(date);
33163 let interval =
33164 Expression::Interval(Box::new(crate::expressions::Interval {
33165 this: Some(val),
33166 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33167 unit,
33168 use_plural: false,
33169 }),
33170 }));
33171 Ok(Expression::Sub(Box::new(
33172 crate::expressions::BinaryOp::new(cast_date, interval),
33173 )))
33174 }
33175 DialectType::Snowflake => {
33176 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
33177 // Just ensure the date is cast properly
33178 let cast_date = Self::ensure_cast_date(date);
33179 Ok(Expression::DateSub(Box::new(
33180 crate::expressions::DateAddFunc {
33181 this: cast_date,
33182 interval: val,
33183 unit,
33184 },
33185 )))
33186 }
33187 DialectType::PostgreSQL => {
33188 // PostgreSQL: date - INTERVAL 'val UNIT'
33189 let unit_str = Self::interval_unit_to_string(&unit);
33190 let interval =
33191 Expression::Interval(Box::new(crate::expressions::Interval {
33192 this: Some(Expression::Literal(Box::new(Literal::String(
33193 format!("{} {}", Self::expr_to_string(&val), unit_str),
33194 )))),
33195 unit: None,
33196 }));
33197 Ok(Expression::Sub(Box::new(
33198 crate::expressions::BinaryOp::new(date, interval),
33199 )))
33200 }
33201 _ => Ok(Expression::DateSub(Box::new(
33202 crate::expressions::DateAddFunc {
33203 this: date,
33204 interval: val,
33205 unit,
33206 },
33207 ))),
33208 }
33209 }
33210
33211 // DATEADD(unit, val, date) -> target-specific form
33212 // Used by: Redshift, Snowflake, TSQL, ClickHouse
33213 "DATEADD" if args.len() == 3 => {
33214 let arg0 = args.remove(0);
33215 let arg1 = args.remove(0);
33216 let arg2 = args.remove(0);
33217 let unit_str = get_unit_str(&arg0);
33218
33219 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
33220 // Keep DATEADD(UNIT, val, date) with uppercased unit
33221 let unit = Expression::Identifier(Identifier::new(unit_str));
33222 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
33223 let date = if matches!(target, DialectType::TSQL)
33224 && !matches!(
33225 source,
33226 DialectType::Spark | DialectType::Databricks | DialectType::Hive
33227 ) {
33228 Self::ensure_cast_datetime2(arg2)
33229 } else {
33230 arg2
33231 };
33232 return Ok(Expression::Function(Box::new(Function::new(
33233 "DATEADD".to_string(),
33234 vec![unit, arg1, date],
33235 ))));
33236 }
33237
33238 if matches!(target, DialectType::DuckDB) {
33239 // DuckDB: date + INTERVAL 'val' UNIT
33240 let iu = parse_interval_unit(&unit_str);
33241 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33242 this: Some(arg1),
33243 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33244 unit: iu,
33245 use_plural: false,
33246 }),
33247 }));
33248 let cast_date = Self::ensure_cast_timestamp(arg2);
33249 return Ok(Expression::Add(Box::new(
33250 crate::expressions::BinaryOp::new(cast_date, interval),
33251 )));
33252 }
33253
33254 if matches!(target, DialectType::BigQuery) {
33255 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
33256 let iu = parse_interval_unit(&unit_str);
33257 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33258 this: Some(arg1),
33259 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33260 unit: iu,
33261 use_plural: false,
33262 }),
33263 }));
33264 return Ok(Expression::Function(Box::new(Function::new(
33265 "DATE_ADD".to_string(),
33266 vec![arg2, interval],
33267 ))));
33268 }
33269
33270 if matches!(target, DialectType::Databricks) {
33271 // Databricks: keep DATEADD(UNIT, val, date) format
33272 let unit = Expression::Identifier(Identifier::new(unit_str));
33273 return Ok(Expression::Function(Box::new(Function::new(
33274 "DATEADD".to_string(),
33275 vec![unit, arg1, arg2],
33276 ))));
33277 }
33278
33279 if matches!(target, DialectType::Spark) {
33280 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
33281 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
33282 if let Expression::Literal(lit) = &expr {
33283 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
33284 if let Ok(val) = n.parse::<i64>() {
33285 return Expression::Literal(Box::new(
33286 crate::expressions::Literal::Number(
33287 (val * factor).to_string(),
33288 ),
33289 ));
33290 }
33291 }
33292 }
33293 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
33294 expr,
33295 Expression::Literal(Box::new(crate::expressions::Literal::Number(
33296 factor.to_string(),
33297 ))),
33298 )))
33299 }
33300 match unit_str.as_str() {
33301 "YEAR" => {
33302 let months = multiply_expr_dateadd(arg1, 12);
33303 return Ok(Expression::Function(Box::new(Function::new(
33304 "ADD_MONTHS".to_string(),
33305 vec![arg2, months],
33306 ))));
33307 }
33308 "QUARTER" => {
33309 let months = multiply_expr_dateadd(arg1, 3);
33310 return Ok(Expression::Function(Box::new(Function::new(
33311 "ADD_MONTHS".to_string(),
33312 vec![arg2, months],
33313 ))));
33314 }
33315 "MONTH" => {
33316 return Ok(Expression::Function(Box::new(Function::new(
33317 "ADD_MONTHS".to_string(),
33318 vec![arg2, arg1],
33319 ))));
33320 }
33321 "WEEK" => {
33322 let days = multiply_expr_dateadd(arg1, 7);
33323 return Ok(Expression::Function(Box::new(Function::new(
33324 "DATE_ADD".to_string(),
33325 vec![arg2, days],
33326 ))));
33327 }
33328 "DAY" => {
33329 return Ok(Expression::Function(Box::new(Function::new(
33330 "DATE_ADD".to_string(),
33331 vec![arg2, arg1],
33332 ))));
33333 }
33334 _ => {
33335 let unit = Expression::Identifier(Identifier::new(unit_str));
33336 return Ok(Expression::Function(Box::new(Function::new(
33337 "DATE_ADD".to_string(),
33338 vec![unit, arg1, arg2],
33339 ))));
33340 }
33341 }
33342 }
33343
33344 if matches!(target, DialectType::Hive) {
33345 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
33346 match unit_str.as_str() {
33347 "DAY" => {
33348 return Ok(Expression::Function(Box::new(Function::new(
33349 "DATE_ADD".to_string(),
33350 vec![arg2, arg1],
33351 ))));
33352 }
33353 "MONTH" => {
33354 return Ok(Expression::Function(Box::new(Function::new(
33355 "ADD_MONTHS".to_string(),
33356 vec![arg2, arg1],
33357 ))));
33358 }
33359 _ => {
33360 let iu = parse_interval_unit(&unit_str);
33361 let interval =
33362 Expression::Interval(Box::new(crate::expressions::Interval {
33363 this: Some(arg1),
33364 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33365 unit: iu,
33366 use_plural: false,
33367 }),
33368 }));
33369 return Ok(Expression::Add(Box::new(
33370 crate::expressions::BinaryOp::new(arg2, interval),
33371 )));
33372 }
33373 }
33374 }
33375
33376 if matches!(target, DialectType::PostgreSQL) {
33377 // PostgreSQL: date + INTERVAL 'val UNIT'
33378 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33379 this: Some(Expression::Literal(Box::new(Literal::String(format!(
33380 "{} {}",
33381 Self::expr_to_string(&arg1),
33382 unit_str
33383 ))))),
33384 unit: None,
33385 }));
33386 return Ok(Expression::Add(Box::new(
33387 crate::expressions::BinaryOp::new(arg2, interval),
33388 )));
33389 }
33390
33391 if matches!(
33392 target,
33393 DialectType::Presto | DialectType::Trino | DialectType::Athena
33394 ) {
33395 // Presto/Trino: DATE_ADD('UNIT', val, date)
33396 return Ok(Expression::Function(Box::new(Function::new(
33397 "DATE_ADD".to_string(),
33398 vec![
33399 Expression::Literal(Box::new(Literal::String(unit_str))),
33400 arg1,
33401 arg2,
33402 ],
33403 ))));
33404 }
33405
33406 if matches!(target, DialectType::ClickHouse) {
33407 // ClickHouse: DATE_ADD(UNIT, val, date)
33408 let unit = Expression::Identifier(Identifier::new(unit_str));
33409 return Ok(Expression::Function(Box::new(Function::new(
33410 "DATE_ADD".to_string(),
33411 vec![unit, arg1, arg2],
33412 ))));
33413 }
33414
33415 // Default: keep DATEADD with uppercased unit
33416 let unit = Expression::Identifier(Identifier::new(unit_str));
33417 Ok(Expression::Function(Box::new(Function::new(
33418 "DATEADD".to_string(),
33419 vec![unit, arg1, arg2],
33420 ))))
33421 }
33422
33423 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
33424 "DATE_ADD" if args.len() == 3 => {
33425 let arg0 = args.remove(0);
33426 let arg1 = args.remove(0);
33427 let arg2 = args.remove(0);
33428 let unit_str = get_unit_str(&arg0);
33429
33430 if matches!(
33431 target,
33432 DialectType::Presto | DialectType::Trino | DialectType::Athena
33433 ) {
33434 // Presto/Trino: DATE_ADD('UNIT', val, date)
33435 return Ok(Expression::Function(Box::new(Function::new(
33436 "DATE_ADD".to_string(),
33437 vec![
33438 Expression::Literal(Box::new(Literal::String(unit_str))),
33439 arg1,
33440 arg2,
33441 ],
33442 ))));
33443 }
33444
33445 if matches!(
33446 target,
33447 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
33448 ) {
33449 // DATEADD(UNIT, val, date)
33450 let unit = Expression::Identifier(Identifier::new(unit_str));
33451 let date = if matches!(target, DialectType::TSQL) {
33452 Self::ensure_cast_datetime2(arg2)
33453 } else {
33454 arg2
33455 };
33456 return Ok(Expression::Function(Box::new(Function::new(
33457 "DATEADD".to_string(),
33458 vec![unit, arg1, date],
33459 ))));
33460 }
33461
33462 if matches!(target, DialectType::DuckDB) {
33463 // DuckDB: date + INTERVAL val UNIT
33464 let iu = parse_interval_unit(&unit_str);
33465 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33466 this: Some(arg1),
33467 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33468 unit: iu,
33469 use_plural: false,
33470 }),
33471 }));
33472 return Ok(Expression::Add(Box::new(
33473 crate::expressions::BinaryOp::new(arg2, interval),
33474 )));
33475 }
33476
33477 if matches!(target, DialectType::Spark | DialectType::Databricks) {
33478 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
33479 let unit = Expression::Identifier(Identifier::new(unit_str));
33480 return Ok(Expression::Function(Box::new(Function::new(
33481 "DATE_ADD".to_string(),
33482 vec![unit, arg1, arg2],
33483 ))));
33484 }
33485
33486 // Default: DATE_ADD(UNIT, val, date)
33487 let unit = Expression::Identifier(Identifier::new(unit_str));
33488 Ok(Expression::Function(Box::new(Function::new(
33489 "DATE_ADD".to_string(),
33490 vec![unit, arg1, arg2],
33491 ))))
33492 }
33493
33494 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
33495 "DATE_ADD" if args.len() == 2 => {
33496 let date = args.remove(0);
33497 let interval_expr = args.remove(0);
33498 let (val, unit) =
33499 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
33500 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
33501 });
33502 let unit_str = Self::interval_unit_to_string(&unit);
33503
33504 match target {
33505 DialectType::DuckDB => {
33506 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
33507 let cast_date = Self::ensure_cast_date(date);
33508 let quoted_val = Self::quote_interval_val(&val);
33509 let interval =
33510 Expression::Interval(Box::new(crate::expressions::Interval {
33511 this: Some(quoted_val),
33512 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33513 unit,
33514 use_plural: false,
33515 }),
33516 }));
33517 Ok(Expression::Add(Box::new(
33518 crate::expressions::BinaryOp::new(cast_date, interval),
33519 )))
33520 }
33521 DialectType::PostgreSQL => {
33522 // PostgreSQL: date + INTERVAL 'val UNIT'
33523 let interval =
33524 Expression::Interval(Box::new(crate::expressions::Interval {
33525 this: Some(Expression::Literal(Box::new(Literal::String(
33526 format!("{} {}", Self::expr_to_string(&val), unit_str),
33527 )))),
33528 unit: None,
33529 }));
33530 Ok(Expression::Add(Box::new(
33531 crate::expressions::BinaryOp::new(date, interval),
33532 )))
33533 }
33534 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
33535 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
33536 let val_str = Self::expr_to_string(&val);
33537 Ok(Expression::Function(Box::new(Function::new(
33538 "DATE_ADD".to_string(),
33539 vec![
33540 Expression::Literal(Box::new(Literal::String(
33541 unit_str.to_string(),
33542 ))),
33543 Expression::Cast(Box::new(Cast {
33544 this: Expression::Literal(Box::new(Literal::String(val_str))),
33545 to: DataType::BigInt { length: None },
33546 trailing_comments: vec![],
33547 double_colon_syntax: false,
33548 format: None,
33549 default: None,
33550 inferred_type: None,
33551 })),
33552 date,
33553 ],
33554 ))))
33555 }
33556 DialectType::Spark | DialectType::Hive => {
33557 // Spark/Hive: DATE_ADD(date, val) for DAY
33558 match unit_str {
33559 "DAY" => Ok(Expression::Function(Box::new(Function::new(
33560 "DATE_ADD".to_string(),
33561 vec![date, val],
33562 )))),
33563 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
33564 "ADD_MONTHS".to_string(),
33565 vec![date, val],
33566 )))),
33567 _ => {
33568 let iu = parse_interval_unit(&unit_str);
33569 let interval =
33570 Expression::Interval(Box::new(crate::expressions::Interval {
33571 this: Some(val),
33572 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33573 unit: iu,
33574 use_plural: false,
33575 }),
33576 }));
33577 Ok(Expression::Function(Box::new(Function::new(
33578 "DATE_ADD".to_string(),
33579 vec![date, interval],
33580 ))))
33581 }
33582 }
33583 }
33584 DialectType::Snowflake => {
33585 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
33586 let cast_date = Self::ensure_cast_date(date);
33587 let val_str = Self::expr_to_string(&val);
33588 Ok(Expression::Function(Box::new(Function::new(
33589 "DATEADD".to_string(),
33590 vec![
33591 Expression::Identifier(Identifier::new(unit_str)),
33592 Expression::Literal(Box::new(Literal::String(val_str))),
33593 cast_date,
33594 ],
33595 ))))
33596 }
33597 DialectType::TSQL | DialectType::Fabric => {
33598 let cast_date = Self::ensure_cast_datetime2(date);
33599 Ok(Expression::Function(Box::new(Function::new(
33600 "DATEADD".to_string(),
33601 vec![
33602 Expression::Identifier(Identifier::new(unit_str)),
33603 val,
33604 cast_date,
33605 ],
33606 ))))
33607 }
33608 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
33609 "DATEADD".to_string(),
33610 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33611 )))),
33612 DialectType::MySQL => {
33613 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
33614 let quoted_val = Self::quote_interval_val(&val);
33615 let iu = parse_interval_unit(&unit_str);
33616 let interval =
33617 Expression::Interval(Box::new(crate::expressions::Interval {
33618 this: Some(quoted_val),
33619 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33620 unit: iu,
33621 use_plural: false,
33622 }),
33623 }));
33624 Ok(Expression::Function(Box::new(Function::new(
33625 "DATE_ADD".to_string(),
33626 vec![date, interval],
33627 ))))
33628 }
33629 DialectType::BigQuery => {
33630 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
33631 let quoted_val = Self::quote_interval_val(&val);
33632 let iu = parse_interval_unit(&unit_str);
33633 let interval =
33634 Expression::Interval(Box::new(crate::expressions::Interval {
33635 this: Some(quoted_val),
33636 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33637 unit: iu,
33638 use_plural: false,
33639 }),
33640 }));
33641 Ok(Expression::Function(Box::new(Function::new(
33642 "DATE_ADD".to_string(),
33643 vec![date, interval],
33644 ))))
33645 }
33646 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
33647 "DATEADD".to_string(),
33648 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33649 )))),
33650 _ => {
33651 // Default: keep as DATE_ADD with decomposed interval
33652 Ok(Expression::DateAdd(Box::new(
33653 crate::expressions::DateAddFunc {
33654 this: date,
33655 interval: val,
33656 unit,
33657 },
33658 )))
33659 }
33660 }
33661 }
33662
33663 // ADD_MONTHS(date, val) -> target-specific form
33664 "ADD_MONTHS" if args.len() == 2 => {
33665 let date = args.remove(0);
33666 let val = args.remove(0);
33667
33668 if matches!(target, DialectType::TSQL) {
33669 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
33670 let cast_date = Self::ensure_cast_datetime2(date);
33671 return Ok(Expression::Function(Box::new(Function::new(
33672 "DATEADD".to_string(),
33673 vec![
33674 Expression::Identifier(Identifier::new("MONTH")),
33675 val,
33676 cast_date,
33677 ],
33678 ))));
33679 }
33680
33681 if matches!(target, DialectType::DuckDB) {
33682 // DuckDB: date + INTERVAL val MONTH
33683 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33684 this: Some(val),
33685 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33686 unit: crate::expressions::IntervalUnit::Month,
33687 use_plural: false,
33688 }),
33689 }));
33690 return Ok(Expression::Add(Box::new(
33691 crate::expressions::BinaryOp::new(date, interval),
33692 )));
33693 }
33694
33695 if matches!(target, DialectType::Snowflake) {
33696 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
33697 if matches!(source, DialectType::Snowflake) {
33698 return Ok(Expression::Function(Box::new(Function::new(
33699 "ADD_MONTHS".to_string(),
33700 vec![date, val],
33701 ))));
33702 }
33703 return Ok(Expression::Function(Box::new(Function::new(
33704 "DATEADD".to_string(),
33705 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
33706 ))));
33707 }
33708
33709 if matches!(target, DialectType::Spark | DialectType::Databricks) {
33710 // Spark: ADD_MONTHS(date, val) - keep as is
33711 return Ok(Expression::Function(Box::new(Function::new(
33712 "ADD_MONTHS".to_string(),
33713 vec![date, val],
33714 ))));
33715 }
33716
33717 if matches!(target, DialectType::Hive) {
33718 return Ok(Expression::Function(Box::new(Function::new(
33719 "ADD_MONTHS".to_string(),
33720 vec![date, val],
33721 ))));
33722 }
33723
33724 if matches!(
33725 target,
33726 DialectType::Presto | DialectType::Trino | DialectType::Athena
33727 ) {
33728 // Presto: DATE_ADD('MONTH', val, date)
33729 return Ok(Expression::Function(Box::new(Function::new(
33730 "DATE_ADD".to_string(),
33731 vec![
33732 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
33733 val,
33734 date,
33735 ],
33736 ))));
33737 }
33738
33739 // Default: keep ADD_MONTHS
33740 Ok(Expression::Function(Box::new(Function::new(
33741 "ADD_MONTHS".to_string(),
33742 vec![date, val],
33743 ))))
33744 }
33745
33746 // SAFE_DIVIDE(x, y) -> target-specific form directly
33747 "SAFE_DIVIDE" if args.len() == 2 => {
33748 let x = args.remove(0);
33749 let y = args.remove(0);
33750 // Wrap x and y in parens if they're complex expressions
33751 let y_ref = match &y {
33752 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33753 y.clone()
33754 }
33755 _ => Expression::Paren(Box::new(Paren {
33756 this: y.clone(),
33757 trailing_comments: vec![],
33758 })),
33759 };
33760 let x_ref = match &x {
33761 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33762 x.clone()
33763 }
33764 _ => Expression::Paren(Box::new(Paren {
33765 this: x.clone(),
33766 trailing_comments: vec![],
33767 })),
33768 };
33769 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
33770 y_ref.clone(),
33771 Expression::number(0),
33772 )));
33773 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33774 x_ref.clone(),
33775 y_ref.clone(),
33776 )));
33777
33778 match target {
33779 DialectType::Spark | DialectType::Databricks => Ok(Expression::Function(
33780 Box::new(Function::new("TRY_DIVIDE".to_string(), vec![x, y])),
33781 )),
33782 DialectType::DuckDB | DialectType::PostgreSQL => {
33783 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
33784 let result_div = if matches!(target, DialectType::PostgreSQL) {
33785 let cast_x = Expression::Cast(Box::new(Cast {
33786 this: x_ref,
33787 to: DataType::Custom {
33788 name: "DOUBLE PRECISION".to_string(),
33789 },
33790 trailing_comments: vec![],
33791 double_colon_syntax: false,
33792 format: None,
33793 default: None,
33794 inferred_type: None,
33795 }));
33796 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33797 cast_x, y_ref,
33798 )))
33799 } else {
33800 div_expr
33801 };
33802 Ok(Expression::Case(Box::new(crate::expressions::Case {
33803 operand: None,
33804 whens: vec![(condition, result_div)],
33805 else_: Some(Expression::Null(crate::expressions::Null)),
33806 comments: Vec::new(),
33807 inferred_type: None,
33808 })))
33809 }
33810 DialectType::Snowflake => {
33811 // IFF(y <> 0, x / y, NULL)
33812 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33813 condition,
33814 true_value: div_expr,
33815 false_value: Some(Expression::Null(crate::expressions::Null)),
33816 original_name: Some("IFF".to_string()),
33817 inferred_type: None,
33818 })))
33819 }
33820 DialectType::Presto | DialectType::Trino => {
33821 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
33822 let cast_x = Expression::Cast(Box::new(Cast {
33823 this: x_ref,
33824 to: DataType::Double {
33825 precision: None,
33826 scale: None,
33827 },
33828 trailing_comments: vec![],
33829 double_colon_syntax: false,
33830 format: None,
33831 default: None,
33832 inferred_type: None,
33833 }));
33834 let cast_div = Expression::Div(Box::new(
33835 crate::expressions::BinaryOp::new(cast_x, y_ref),
33836 ));
33837 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33838 condition,
33839 true_value: cast_div,
33840 false_value: Some(Expression::Null(crate::expressions::Null)),
33841 original_name: None,
33842 inferred_type: None,
33843 })))
33844 }
33845 _ => {
33846 // IF(y <> 0, x / y, NULL)
33847 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33848 condition,
33849 true_value: div_expr,
33850 false_value: Some(Expression::Null(crate::expressions::Null)),
33851 original_name: None,
33852 inferred_type: None,
33853 })))
33854 }
33855 }
33856 }
33857
33858 // GENERATE_UUID() -> UUID() with CAST to string
33859 "GENERATE_UUID" => {
33860 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
33861 this: None,
33862 name: None,
33863 is_string: None,
33864 }));
33865 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
33866 let cast_type = match target {
33867 DialectType::DuckDB => Some(DataType::Text),
33868 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
33869 length: None,
33870 parenthesized_length: false,
33871 }),
33872 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33873 Some(DataType::String { length: None })
33874 }
33875 _ => None,
33876 };
33877 if let Some(dt) = cast_type {
33878 Ok(Expression::Cast(Box::new(Cast {
33879 this: uuid_expr,
33880 to: dt,
33881 trailing_comments: vec![],
33882 double_colon_syntax: false,
33883 format: None,
33884 default: None,
33885 inferred_type: None,
33886 })))
33887 } else {
33888 Ok(uuid_expr)
33889 }
33890 }
33891
33892 // COUNTIF(x) -> CountIf expression
33893 "COUNTIF" if args.len() == 1 => {
33894 let arg = args.remove(0);
33895 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
33896 this: arg,
33897 distinct: false,
33898 filter: None,
33899 order_by: vec![],
33900 name: None,
33901 ignore_nulls: None,
33902 having_max: None,
33903 limit: None,
33904 inferred_type: None,
33905 })))
33906 }
33907
33908 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
33909 "EDIT_DISTANCE" => {
33910 // Strip named arguments (max_distance => N) and pass as positional
33911 let mut positional_args: Vec<Expression> = vec![];
33912 for arg in args {
33913 match arg {
33914 Expression::NamedArgument(na) => {
33915 positional_args.push(na.value);
33916 }
33917 other => positional_args.push(other),
33918 }
33919 }
33920 if positional_args.len() >= 2 {
33921 let col1 = positional_args.remove(0);
33922 let col2 = positional_args.remove(0);
33923 let levenshtein = crate::expressions::BinaryFunc {
33924 this: col1,
33925 expression: col2,
33926 original_name: None,
33927 inferred_type: None,
33928 };
33929 // Pass extra args through a function wrapper with all args
33930 if !positional_args.is_empty() {
33931 let max_dist = positional_args.remove(0);
33932 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
33933 if matches!(target, DialectType::DuckDB) {
33934 let lev = Expression::Function(Box::new(Function::new(
33935 "LEVENSHTEIN".to_string(),
33936 vec![levenshtein.this, levenshtein.expression],
33937 )));
33938 let lev_is_null =
33939 Expression::IsNull(Box::new(crate::expressions::IsNull {
33940 this: lev.clone(),
33941 not: false,
33942 postfix_form: false,
33943 }));
33944 let max_is_null =
33945 Expression::IsNull(Box::new(crate::expressions::IsNull {
33946 this: max_dist.clone(),
33947 not: false,
33948 postfix_form: false,
33949 }));
33950 let null_check =
33951 Expression::Or(Box::new(crate::expressions::BinaryOp {
33952 left: lev_is_null,
33953 right: max_is_null,
33954 left_comments: Vec::new(),
33955 operator_comments: Vec::new(),
33956 trailing_comments: Vec::new(),
33957 inferred_type: None,
33958 }));
33959 let least =
33960 Expression::Least(Box::new(crate::expressions::VarArgFunc {
33961 expressions: vec![lev, max_dist],
33962 original_name: None,
33963 inferred_type: None,
33964 }));
33965 return Ok(Expression::Case(Box::new(crate::expressions::Case {
33966 operand: None,
33967 whens: vec![(
33968 null_check,
33969 Expression::Null(crate::expressions::Null),
33970 )],
33971 else_: Some(least),
33972 comments: Vec::new(),
33973 inferred_type: None,
33974 })));
33975 }
33976 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
33977 all_args.extend(positional_args);
33978 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
33979 let func_name = if matches!(target, DialectType::PostgreSQL) {
33980 "LEVENSHTEIN_LESS_EQUAL"
33981 } else {
33982 "LEVENSHTEIN"
33983 };
33984 return Ok(Expression::Function(Box::new(Function::new(
33985 func_name.to_string(),
33986 all_args,
33987 ))));
33988 }
33989 Ok(Expression::Levenshtein(Box::new(levenshtein)))
33990 } else {
33991 Ok(Expression::Function(Box::new(Function::new(
33992 "EDIT_DISTANCE".to_string(),
33993 positional_args,
33994 ))))
33995 }
33996 }
33997
33998 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
33999 "TIMESTAMP_SECONDS" if args.len() == 1 => {
34000 let arg = args.remove(0);
34001 Ok(Expression::UnixToTime(Box::new(
34002 crate::expressions::UnixToTime {
34003 this: Box::new(arg),
34004 scale: Some(0),
34005 zone: None,
34006 hours: None,
34007 minutes: None,
34008 format: None,
34009 target_type: None,
34010 },
34011 )))
34012 }
34013
34014 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
34015 "TIMESTAMP_MILLIS" if args.len() == 1 => {
34016 let arg = args.remove(0);
34017 Ok(Expression::UnixToTime(Box::new(
34018 crate::expressions::UnixToTime {
34019 this: Box::new(arg),
34020 scale: Some(3),
34021 zone: None,
34022 hours: None,
34023 minutes: None,
34024 format: None,
34025 target_type: None,
34026 },
34027 )))
34028 }
34029
34030 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
34031 "TIMESTAMP_MICROS" if args.len() == 1 => {
34032 let arg = args.remove(0);
34033 Ok(Expression::UnixToTime(Box::new(
34034 crate::expressions::UnixToTime {
34035 this: Box::new(arg),
34036 scale: Some(6),
34037 zone: None,
34038 hours: None,
34039 minutes: None,
34040 format: None,
34041 target_type: None,
34042 },
34043 )))
34044 }
34045
34046 // DIV(x, y) -> IntDiv expression
34047 "DIV" if args.len() == 2 => {
34048 let x = args.remove(0);
34049 let y = args.remove(0);
34050 Ok(Expression::IntDiv(Box::new(
34051 crate::expressions::BinaryFunc {
34052 this: x,
34053 expression: y,
34054 original_name: None,
34055 inferred_type: None,
34056 },
34057 )))
34058 }
34059
34060 // TO_HEX(x) -> target-specific form
34061 "TO_HEX" if args.len() == 1 => {
34062 let arg = args.remove(0);
34063 // Check if inner function already returns hex string in certain targets
34064 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
34065 if matches!(target, DialectType::BigQuery) {
34066 // BQ->BQ: keep as TO_HEX
34067 Ok(Expression::Function(Box::new(Function::new(
34068 "TO_HEX".to_string(),
34069 vec![arg],
34070 ))))
34071 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
34072 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
34073 Ok(arg)
34074 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
34075 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
34076 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
34077 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
34078 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
34079 if let Expression::Function(ref inner_f) = arg {
34080 let inner_args = inner_f.args.clone();
34081 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
34082 "SHA1" => Expression::Function(Box::new(Function::new(
34083 "SHA1_BINARY".to_string(),
34084 inner_args,
34085 ))),
34086 "MD5" => Expression::Function(Box::new(Function::new(
34087 "MD5_BINARY".to_string(),
34088 inner_args,
34089 ))),
34090 "SHA256" => {
34091 let mut a = inner_args;
34092 a.push(Expression::number(256));
34093 Expression::Function(Box::new(Function::new(
34094 "SHA2_BINARY".to_string(),
34095 a,
34096 )))
34097 }
34098 "SHA512" => {
34099 let mut a = inner_args;
34100 a.push(Expression::number(512));
34101 Expression::Function(Box::new(Function::new(
34102 "SHA2_BINARY".to_string(),
34103 a,
34104 )))
34105 }
34106 _ => arg.clone(),
34107 };
34108 Ok(Expression::Function(Box::new(Function::new(
34109 "TO_CHAR".to_string(),
34110 vec![binary_func],
34111 ))))
34112 } else {
34113 let inner = Expression::Function(Box::new(Function::new(
34114 "HEX".to_string(),
34115 vec![arg],
34116 )));
34117 Ok(Expression::Lower(Box::new(
34118 crate::expressions::UnaryFunc::new(inner),
34119 )))
34120 }
34121 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
34122 let inner = Expression::Function(Box::new(Function::new(
34123 "TO_HEX".to_string(),
34124 vec![arg],
34125 )));
34126 Ok(Expression::Lower(Box::new(
34127 crate::expressions::UnaryFunc::new(inner),
34128 )))
34129 } else {
34130 let inner =
34131 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
34132 Ok(Expression::Lower(Box::new(
34133 crate::expressions::UnaryFunc::new(inner),
34134 )))
34135 }
34136 }
34137
34138 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
34139 "LAST_DAY" if args.len() == 2 => {
34140 let date = args.remove(0);
34141 let _unit = args.remove(0); // Strip the unit (MONTH is default)
34142 Ok(Expression::Function(Box::new(Function::new(
34143 "LAST_DAY".to_string(),
34144 vec![date],
34145 ))))
34146 }
34147
34148 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
34149 "GENERATE_ARRAY" => {
34150 let start = args.get(0).cloned();
34151 let end = args.get(1).cloned();
34152 let step = args.get(2).cloned();
34153 Ok(Expression::GenerateSeries(Box::new(
34154 crate::expressions::GenerateSeries {
34155 start: start.map(Box::new),
34156 end: end.map(Box::new),
34157 step: step.map(Box::new),
34158 is_end_exclusive: None,
34159 },
34160 )))
34161 }
34162
34163 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
34164 "GENERATE_TIMESTAMP_ARRAY" => {
34165 let start = args.get(0).cloned();
34166 let end = args.get(1).cloned();
34167 let step = args.get(2).cloned();
34168
34169 if matches!(target, DialectType::DuckDB) {
34170 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
34171 // Only cast string literals - leave columns/expressions as-is
34172 let maybe_cast_ts = |expr: Expression| -> Expression {
34173 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
34174 {
34175 Expression::Cast(Box::new(Cast {
34176 this: expr,
34177 to: DataType::Timestamp {
34178 precision: None,
34179 timezone: false,
34180 },
34181 trailing_comments: vec![],
34182 double_colon_syntax: false,
34183 format: None,
34184 default: None,
34185 inferred_type: None,
34186 }))
34187 } else {
34188 expr
34189 }
34190 };
34191 let cast_start = start.map(maybe_cast_ts);
34192 let cast_end = end.map(maybe_cast_ts);
34193 Ok(Expression::GenerateSeries(Box::new(
34194 crate::expressions::GenerateSeries {
34195 start: cast_start.map(Box::new),
34196 end: cast_end.map(Box::new),
34197 step: step.map(Box::new),
34198 is_end_exclusive: None,
34199 },
34200 )))
34201 } else {
34202 Ok(Expression::GenerateSeries(Box::new(
34203 crate::expressions::GenerateSeries {
34204 start: start.map(Box::new),
34205 end: end.map(Box::new),
34206 step: step.map(Box::new),
34207 is_end_exclusive: None,
34208 },
34209 )))
34210 }
34211 }
34212
34213 // TO_JSON(x) -> target-specific (from Spark/Hive)
34214 "TO_JSON" => {
34215 match target {
34216 DialectType::Presto | DialectType::Trino => {
34217 // JSON_FORMAT(CAST(x AS JSON))
34218 let arg = args
34219 .into_iter()
34220 .next()
34221 .unwrap_or(Expression::Null(crate::expressions::Null));
34222 let cast_json = Expression::Cast(Box::new(Cast {
34223 this: arg,
34224 to: DataType::Custom {
34225 name: "JSON".to_string(),
34226 },
34227 trailing_comments: vec![],
34228 double_colon_syntax: false,
34229 format: None,
34230 default: None,
34231 inferred_type: None,
34232 }));
34233 Ok(Expression::Function(Box::new(Function::new(
34234 "JSON_FORMAT".to_string(),
34235 vec![cast_json],
34236 ))))
34237 }
34238 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
34239 "TO_JSON_STRING".to_string(),
34240 args,
34241 )))),
34242 DialectType::DuckDB => {
34243 // CAST(TO_JSON(x) AS TEXT)
34244 let arg = args
34245 .into_iter()
34246 .next()
34247 .unwrap_or(Expression::Null(crate::expressions::Null));
34248 let to_json = Expression::Function(Box::new(Function::new(
34249 "TO_JSON".to_string(),
34250 vec![arg],
34251 )));
34252 Ok(Expression::Cast(Box::new(Cast {
34253 this: to_json,
34254 to: DataType::Text,
34255 trailing_comments: vec![],
34256 double_colon_syntax: false,
34257 format: None,
34258 default: None,
34259 inferred_type: None,
34260 })))
34261 }
34262 _ => Ok(Expression::Function(Box::new(Function::new(
34263 "TO_JSON".to_string(),
34264 args,
34265 )))),
34266 }
34267 }
34268
34269 // TO_JSON_STRING(x) -> target-specific
34270 "TO_JSON_STRING" => {
34271 match target {
34272 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
34273 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
34274 ),
34275 DialectType::Presto | DialectType::Trino => {
34276 // JSON_FORMAT(CAST(x AS JSON))
34277 let arg = args
34278 .into_iter()
34279 .next()
34280 .unwrap_or(Expression::Null(crate::expressions::Null));
34281 let cast_json = Expression::Cast(Box::new(Cast {
34282 this: arg,
34283 to: DataType::Custom {
34284 name: "JSON".to_string(),
34285 },
34286 trailing_comments: vec![],
34287 double_colon_syntax: false,
34288 format: None,
34289 default: None,
34290 inferred_type: None,
34291 }));
34292 Ok(Expression::Function(Box::new(Function::new(
34293 "JSON_FORMAT".to_string(),
34294 vec![cast_json],
34295 ))))
34296 }
34297 DialectType::DuckDB => {
34298 // CAST(TO_JSON(x) AS TEXT)
34299 let arg = args
34300 .into_iter()
34301 .next()
34302 .unwrap_or(Expression::Null(crate::expressions::Null));
34303 let to_json = Expression::Function(Box::new(Function::new(
34304 "TO_JSON".to_string(),
34305 vec![arg],
34306 )));
34307 Ok(Expression::Cast(Box::new(Cast {
34308 this: to_json,
34309 to: DataType::Text,
34310 trailing_comments: vec![],
34311 double_colon_syntax: false,
34312 format: None,
34313 default: None,
34314 inferred_type: None,
34315 })))
34316 }
34317 DialectType::Snowflake => {
34318 // TO_JSON(x)
34319 Ok(Expression::Function(Box::new(Function::new(
34320 "TO_JSON".to_string(),
34321 args,
34322 ))))
34323 }
34324 _ => Ok(Expression::Function(Box::new(Function::new(
34325 "TO_JSON_STRING".to_string(),
34326 args,
34327 )))),
34328 }
34329 }
34330
34331 // SAFE_ADD(x, y) -> SafeAdd expression
34332 "SAFE_ADD" if args.len() == 2 => {
34333 let x = args.remove(0);
34334 let y = args.remove(0);
34335 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
34336 this: Box::new(x),
34337 expression: Box::new(y),
34338 })))
34339 }
34340
34341 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
34342 "SAFE_SUBTRACT" if args.len() == 2 => {
34343 let x = args.remove(0);
34344 let y = args.remove(0);
34345 Ok(Expression::SafeSubtract(Box::new(
34346 crate::expressions::SafeSubtract {
34347 this: Box::new(x),
34348 expression: Box::new(y),
34349 },
34350 )))
34351 }
34352
34353 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
34354 "SAFE_MULTIPLY" if args.len() == 2 => {
34355 let x = args.remove(0);
34356 let y = args.remove(0);
34357 Ok(Expression::SafeMultiply(Box::new(
34358 crate::expressions::SafeMultiply {
34359 this: Box::new(x),
34360 expression: Box::new(y),
34361 },
34362 )))
34363 }
34364
34365 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
34366 "REGEXP_CONTAINS" if args.len() == 2 => {
34367 let str_expr = args.remove(0);
34368 let pattern = args.remove(0);
34369 Ok(Expression::RegexpLike(Box::new(
34370 crate::expressions::RegexpFunc {
34371 this: str_expr,
34372 pattern,
34373 flags: None,
34374 },
34375 )))
34376 }
34377
34378 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
34379 "CONTAINS_SUBSTR" if args.len() == 2 => {
34380 let a = args.remove(0);
34381 let b = args.remove(0);
34382 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
34383 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
34384 Ok(Expression::Function(Box::new(Function::new(
34385 "CONTAINS".to_string(),
34386 vec![lower_a, lower_b],
34387 ))))
34388 }
34389
34390 // INT64(x) -> CAST(x AS BIGINT)
34391 "INT64" if args.len() == 1 => {
34392 let arg = args.remove(0);
34393 Ok(Expression::Cast(Box::new(Cast {
34394 this: arg,
34395 to: DataType::BigInt { length: None },
34396 trailing_comments: vec![],
34397 double_colon_syntax: false,
34398 format: None,
34399 default: None,
34400 inferred_type: None,
34401 })))
34402 }
34403
34404 // INSTR(str, substr) -> target-specific
34405 "INSTR" if args.len() >= 2 => {
34406 let str_expr = args.remove(0);
34407 let substr = args.remove(0);
34408 if matches!(target, DialectType::Snowflake) {
34409 // CHARINDEX(substr, str)
34410 Ok(Expression::Function(Box::new(Function::new(
34411 "CHARINDEX".to_string(),
34412 vec![substr, str_expr],
34413 ))))
34414 } else if matches!(target, DialectType::BigQuery) {
34415 // Keep as INSTR
34416 Ok(Expression::Function(Box::new(Function::new(
34417 "INSTR".to_string(),
34418 vec![str_expr, substr],
34419 ))))
34420 } else {
34421 // Default: keep as INSTR
34422 Ok(Expression::Function(Box::new(Function::new(
34423 "INSTR".to_string(),
34424 vec![str_expr, substr],
34425 ))))
34426 }
34427 }
34428
34429 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
34430 "DATE_TRUNC" if args.len() == 2 => {
34431 let expr = args.remove(0);
34432 let unit_expr = args.remove(0);
34433 let unit_str = get_unit_str(&unit_expr);
34434
34435 match target {
34436 DialectType::DuckDB
34437 | DialectType::Snowflake
34438 | DialectType::PostgreSQL
34439 | DialectType::Presto
34440 | DialectType::Trino
34441 | DialectType::Databricks
34442 | DialectType::Spark
34443 | DialectType::Redshift
34444 | DialectType::ClickHouse
34445 | DialectType::TSQL => {
34446 // Standard: DATE_TRUNC('UNIT', expr)
34447 Ok(Expression::Function(Box::new(Function::new(
34448 "DATE_TRUNC".to_string(),
34449 vec![
34450 Expression::Literal(Box::new(Literal::String(unit_str))),
34451 expr,
34452 ],
34453 ))))
34454 }
34455 _ => {
34456 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
34457 Ok(Expression::Function(Box::new(Function::new(
34458 "DATE_TRUNC".to_string(),
34459 vec![expr, unit_expr],
34460 ))))
34461 }
34462 }
34463 }
34464
34465 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
34466 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
34467 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
34468 let ts = args.remove(0);
34469 let unit_expr = args.remove(0);
34470 let tz = if !args.is_empty() {
34471 Some(args.remove(0))
34472 } else {
34473 None
34474 };
34475 let unit_str = get_unit_str(&unit_expr);
34476
34477 match target {
34478 DialectType::DuckDB => {
34479 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
34480 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
34481 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
34482 let is_coarse = matches!(
34483 unit_str.as_str(),
34484 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
34485 );
34486 // For DATETIME_TRUNC, cast string args to TIMESTAMP
34487 let cast_ts = if name == "DATETIME_TRUNC" {
34488 match ts {
34489 Expression::Literal(ref lit)
34490 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
34491 {
34492 Expression::Cast(Box::new(Cast {
34493 this: ts,
34494 to: DataType::Timestamp {
34495 precision: None,
34496 timezone: false,
34497 },
34498 trailing_comments: vec![],
34499 double_colon_syntax: false,
34500 format: None,
34501 default: None,
34502 inferred_type: None,
34503 }))
34504 }
34505 _ => Self::maybe_cast_ts_to_tz(ts, &name),
34506 }
34507 } else {
34508 Self::maybe_cast_ts_to_tz(ts, &name)
34509 };
34510
34511 if let Some(tz_arg) = tz {
34512 if is_coarse {
34513 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
34514 let at_tz = Expression::AtTimeZone(Box::new(
34515 crate::expressions::AtTimeZone {
34516 this: cast_ts,
34517 zone: tz_arg.clone(),
34518 },
34519 ));
34520 let date_trunc = Expression::Function(Box::new(Function::new(
34521 "DATE_TRUNC".to_string(),
34522 vec![
34523 Expression::Literal(Box::new(Literal::String(unit_str))),
34524 at_tz,
34525 ],
34526 )));
34527 Ok(Expression::AtTimeZone(Box::new(
34528 crate::expressions::AtTimeZone {
34529 this: date_trunc,
34530 zone: tz_arg,
34531 },
34532 )))
34533 } else {
34534 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
34535 Ok(Expression::Function(Box::new(Function::new(
34536 "DATE_TRUNC".to_string(),
34537 vec![
34538 Expression::Literal(Box::new(Literal::String(unit_str))),
34539 cast_ts,
34540 ],
34541 ))))
34542 }
34543 } else {
34544 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
34545 Ok(Expression::Function(Box::new(Function::new(
34546 "DATE_TRUNC".to_string(),
34547 vec![
34548 Expression::Literal(Box::new(Literal::String(unit_str))),
34549 cast_ts,
34550 ],
34551 ))))
34552 }
34553 }
34554 DialectType::Databricks | DialectType::Spark => {
34555 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
34556 Ok(Expression::Function(Box::new(Function::new(
34557 "DATE_TRUNC".to_string(),
34558 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
34559 ))))
34560 }
34561 _ => {
34562 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
34563 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
34564 let mut date_trunc_args = vec![unit, ts];
34565 if let Some(tz_arg) = tz {
34566 date_trunc_args.push(tz_arg);
34567 }
34568 Ok(Expression::Function(Box::new(Function::new(
34569 "TIMESTAMP_TRUNC".to_string(),
34570 date_trunc_args,
34571 ))))
34572 }
34573 }
34574 }
34575
34576 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
34577 "TIME" => {
34578 if args.len() == 3 {
34579 // TIME(h, m, s) constructor
34580 match target {
34581 DialectType::TSQL => {
34582 // TIMEFROMPARTS(h, m, s, 0, 0)
34583 args.push(Expression::number(0));
34584 args.push(Expression::number(0));
34585 Ok(Expression::Function(Box::new(Function::new(
34586 "TIMEFROMPARTS".to_string(),
34587 args,
34588 ))))
34589 }
34590 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
34591 "MAKETIME".to_string(),
34592 args,
34593 )))),
34594 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
34595 Function::new("MAKE_TIME".to_string(), args),
34596 ))),
34597 _ => Ok(Expression::Function(Box::new(Function::new(
34598 "TIME".to_string(),
34599 args,
34600 )))),
34601 }
34602 } else if args.len() == 1 {
34603 let arg = args.remove(0);
34604 if matches!(target, DialectType::Spark) {
34605 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
34606 Ok(Expression::Cast(Box::new(Cast {
34607 this: arg,
34608 to: DataType::Timestamp {
34609 timezone: false,
34610 precision: None,
34611 },
34612 trailing_comments: vec![],
34613 double_colon_syntax: false,
34614 format: None,
34615 default: None,
34616 inferred_type: None,
34617 })))
34618 } else {
34619 // Most targets: CAST(x AS TIME)
34620 Ok(Expression::Cast(Box::new(Cast {
34621 this: arg,
34622 to: DataType::Time {
34623 precision: None,
34624 timezone: false,
34625 },
34626 trailing_comments: vec![],
34627 double_colon_syntax: false,
34628 format: None,
34629 default: None,
34630 inferred_type: None,
34631 })))
34632 }
34633 } else if args.len() == 2 {
34634 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
34635 let expr = args.remove(0);
34636 let tz = args.remove(0);
34637 let cast_tstz = Expression::Cast(Box::new(Cast {
34638 this: expr,
34639 to: DataType::Timestamp {
34640 timezone: true,
34641 precision: None,
34642 },
34643 trailing_comments: vec![],
34644 double_colon_syntax: false,
34645 format: None,
34646 default: None,
34647 inferred_type: None,
34648 }));
34649 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34650 this: cast_tstz,
34651 zone: tz,
34652 }));
34653 Ok(Expression::Cast(Box::new(Cast {
34654 this: at_tz,
34655 to: DataType::Time {
34656 precision: None,
34657 timezone: false,
34658 },
34659 trailing_comments: vec![],
34660 double_colon_syntax: false,
34661 format: None,
34662 default: None,
34663 inferred_type: None,
34664 })))
34665 } else {
34666 Ok(Expression::Function(Box::new(Function::new(
34667 "TIME".to_string(),
34668 args,
34669 ))))
34670 }
34671 }
34672
34673 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
34674 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34675 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
34676 // DATETIME(y, m, d, h, min, s) -> target-specific
34677 "DATETIME" => {
34678 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
34679 if matches!(target, DialectType::BigQuery) {
34680 if args.len() == 2 {
34681 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34682 if has_time_literal {
34683 let first = args.remove(0);
34684 let second = args.remove(0);
34685 let time_as_cast = match second {
34686 Expression::Literal(lit)
34687 if matches!(lit.as_ref(), Literal::Time(_)) =>
34688 {
34689 let Literal::Time(s) = lit.as_ref() else {
34690 unreachable!()
34691 };
34692 Expression::Cast(Box::new(Cast {
34693 this: Expression::Literal(Box::new(Literal::String(
34694 s.clone(),
34695 ))),
34696 to: DataType::Time {
34697 precision: None,
34698 timezone: false,
34699 },
34700 trailing_comments: vec![],
34701 double_colon_syntax: false,
34702 format: None,
34703 default: None,
34704 inferred_type: None,
34705 }))
34706 }
34707 other => other,
34708 };
34709 return Ok(Expression::Function(Box::new(Function::new(
34710 "DATETIME".to_string(),
34711 vec![first, time_as_cast],
34712 ))));
34713 }
34714 }
34715 return Ok(Expression::Function(Box::new(Function::new(
34716 "DATETIME".to_string(),
34717 args,
34718 ))));
34719 }
34720
34721 if args.len() == 1 {
34722 let arg = args.remove(0);
34723 Ok(Expression::Cast(Box::new(Cast {
34724 this: arg,
34725 to: DataType::Timestamp {
34726 timezone: false,
34727 precision: None,
34728 },
34729 trailing_comments: vec![],
34730 double_colon_syntax: false,
34731 format: None,
34732 default: None,
34733 inferred_type: None,
34734 })))
34735 } else if args.len() == 2 {
34736 let first = args.remove(0);
34737 let second = args.remove(0);
34738 // Check if second arg is a TIME literal
34739 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34740 if is_time_literal {
34741 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34742 let cast_date = Expression::Cast(Box::new(Cast {
34743 this: first,
34744 to: DataType::Date,
34745 trailing_comments: vec![],
34746 double_colon_syntax: false,
34747 format: None,
34748 default: None,
34749 inferred_type: None,
34750 }));
34751 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
34752 let time_as_string = match second {
34753 Expression::Literal(lit)
34754 if matches!(lit.as_ref(), Literal::Time(_)) =>
34755 {
34756 let Literal::Time(s) = lit.as_ref() else {
34757 unreachable!()
34758 };
34759 Expression::Literal(Box::new(Literal::String(s.clone())))
34760 }
34761 other => other,
34762 };
34763 let cast_time = Expression::Cast(Box::new(Cast {
34764 this: time_as_string,
34765 to: DataType::Time {
34766 precision: None,
34767 timezone: false,
34768 },
34769 trailing_comments: vec![],
34770 double_colon_syntax: false,
34771 format: None,
34772 default: None,
34773 inferred_type: None,
34774 }));
34775 let add_expr =
34776 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
34777 Ok(Expression::Cast(Box::new(Cast {
34778 this: add_expr,
34779 to: DataType::Timestamp {
34780 timezone: false,
34781 precision: None,
34782 },
34783 trailing_comments: vec![],
34784 double_colon_syntax: false,
34785 format: None,
34786 default: None,
34787 inferred_type: None,
34788 })))
34789 } else {
34790 // DATETIME('string', 'timezone')
34791 let cast_tstz = Expression::Cast(Box::new(Cast {
34792 this: first,
34793 to: DataType::Timestamp {
34794 timezone: true,
34795 precision: None,
34796 },
34797 trailing_comments: vec![],
34798 double_colon_syntax: false,
34799 format: None,
34800 default: None,
34801 inferred_type: None,
34802 }));
34803 let at_tz =
34804 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34805 this: cast_tstz,
34806 zone: second,
34807 }));
34808 Ok(Expression::Cast(Box::new(Cast {
34809 this: at_tz,
34810 to: DataType::Timestamp {
34811 timezone: false,
34812 precision: None,
34813 },
34814 trailing_comments: vec![],
34815 double_colon_syntax: false,
34816 format: None,
34817 default: None,
34818 inferred_type: None,
34819 })))
34820 }
34821 } else if args.len() >= 3 {
34822 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
34823 // For other targets, use MAKE_TIMESTAMP or similar
34824 if matches!(target, DialectType::Snowflake) {
34825 Ok(Expression::Function(Box::new(Function::new(
34826 "TIMESTAMP_FROM_PARTS".to_string(),
34827 args,
34828 ))))
34829 } else {
34830 Ok(Expression::Function(Box::new(Function::new(
34831 "DATETIME".to_string(),
34832 args,
34833 ))))
34834 }
34835 } else {
34836 Ok(Expression::Function(Box::new(Function::new(
34837 "DATETIME".to_string(),
34838 args,
34839 ))))
34840 }
34841 }
34842
34843 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
34844 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
34845 "TIMESTAMP" => {
34846 if args.len() == 1 {
34847 let arg = args.remove(0);
34848 Ok(Expression::Cast(Box::new(Cast {
34849 this: arg,
34850 to: DataType::Timestamp {
34851 timezone: true,
34852 precision: None,
34853 },
34854 trailing_comments: vec![],
34855 double_colon_syntax: false,
34856 format: None,
34857 default: None,
34858 inferred_type: None,
34859 })))
34860 } else if args.len() == 2 {
34861 let arg = args.remove(0);
34862 let tz = args.remove(0);
34863 let cast_ts = Expression::Cast(Box::new(Cast {
34864 this: arg,
34865 to: DataType::Timestamp {
34866 timezone: false,
34867 precision: None,
34868 },
34869 trailing_comments: vec![],
34870 double_colon_syntax: false,
34871 format: None,
34872 default: None,
34873 inferred_type: None,
34874 }));
34875 if matches!(target, DialectType::Snowflake) {
34876 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
34877 Ok(Expression::Function(Box::new(Function::new(
34878 "CONVERT_TIMEZONE".to_string(),
34879 vec![tz, cast_ts],
34880 ))))
34881 } else {
34882 Ok(Expression::AtTimeZone(Box::new(
34883 crate::expressions::AtTimeZone {
34884 this: cast_ts,
34885 zone: tz,
34886 },
34887 )))
34888 }
34889 } else {
34890 Ok(Expression::Function(Box::new(Function::new(
34891 "TIMESTAMP".to_string(),
34892 args,
34893 ))))
34894 }
34895 }
34896
34897 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
34898 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
34899 "STRING" => {
34900 if args.len() == 1 {
34901 let arg = args.remove(0);
34902 let cast_type = match target {
34903 DialectType::DuckDB => DataType::Text,
34904 _ => DataType::VarChar {
34905 length: None,
34906 parenthesized_length: false,
34907 },
34908 };
34909 Ok(Expression::Cast(Box::new(Cast {
34910 this: arg,
34911 to: cast_type,
34912 trailing_comments: vec![],
34913 double_colon_syntax: false,
34914 format: None,
34915 default: None,
34916 inferred_type: None,
34917 })))
34918 } else if args.len() == 2 {
34919 let arg = args.remove(0);
34920 let tz = args.remove(0);
34921 let cast_type = match target {
34922 DialectType::DuckDB => DataType::Text,
34923 _ => DataType::VarChar {
34924 length: None,
34925 parenthesized_length: false,
34926 },
34927 };
34928 if matches!(target, DialectType::Snowflake) {
34929 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
34930 let convert_tz = Expression::Function(Box::new(Function::new(
34931 "CONVERT_TIMEZONE".to_string(),
34932 vec![
34933 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
34934 tz,
34935 arg,
34936 ],
34937 )));
34938 Ok(Expression::Cast(Box::new(Cast {
34939 this: convert_tz,
34940 to: cast_type,
34941 trailing_comments: vec![],
34942 double_colon_syntax: false,
34943 format: None,
34944 default: None,
34945 inferred_type: None,
34946 })))
34947 } else {
34948 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
34949 let cast_ts = Expression::Cast(Box::new(Cast {
34950 this: arg,
34951 to: DataType::Timestamp {
34952 timezone: false,
34953 precision: None,
34954 },
34955 trailing_comments: vec![],
34956 double_colon_syntax: false,
34957 format: None,
34958 default: None,
34959 inferred_type: None,
34960 }));
34961 let at_utc =
34962 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34963 this: cast_ts,
34964 zone: Expression::Literal(Box::new(Literal::String(
34965 "UTC".to_string(),
34966 ))),
34967 }));
34968 let at_tz =
34969 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34970 this: at_utc,
34971 zone: tz,
34972 }));
34973 Ok(Expression::Cast(Box::new(Cast {
34974 this: at_tz,
34975 to: cast_type,
34976 trailing_comments: vec![],
34977 double_colon_syntax: false,
34978 format: None,
34979 default: None,
34980 inferred_type: None,
34981 })))
34982 }
34983 } else {
34984 Ok(Expression::Function(Box::new(Function::new(
34985 "STRING".to_string(),
34986 args,
34987 ))))
34988 }
34989 }
34990
34991 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
34992 "UNIX_SECONDS" if args.len() == 1 => {
34993 let ts = args.remove(0);
34994 match target {
34995 DialectType::DuckDB => {
34996 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
34997 let cast_ts = Self::ensure_cast_timestamptz(ts);
34998 let epoch = Expression::Function(Box::new(Function::new(
34999 "EPOCH".to_string(),
35000 vec![cast_ts],
35001 )));
35002 Ok(Expression::Cast(Box::new(Cast {
35003 this: epoch,
35004 to: DataType::BigInt { length: None },
35005 trailing_comments: vec![],
35006 double_colon_syntax: false,
35007 format: None,
35008 default: None,
35009 inferred_type: None,
35010 })))
35011 }
35012 DialectType::Snowflake => {
35013 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
35014 let epoch = Expression::Cast(Box::new(Cast {
35015 this: Expression::Literal(Box::new(Literal::String(
35016 "1970-01-01 00:00:00+00".to_string(),
35017 ))),
35018 to: DataType::Timestamp {
35019 timezone: true,
35020 precision: None,
35021 },
35022 trailing_comments: vec![],
35023 double_colon_syntax: false,
35024 format: None,
35025 default: None,
35026 inferred_type: None,
35027 }));
35028 Ok(Expression::TimestampDiff(Box::new(
35029 crate::expressions::TimestampDiff {
35030 this: Box::new(epoch),
35031 expression: Box::new(ts),
35032 unit: Some("SECONDS".to_string()),
35033 },
35034 )))
35035 }
35036 _ => Ok(Expression::Function(Box::new(Function::new(
35037 "UNIX_SECONDS".to_string(),
35038 vec![ts],
35039 )))),
35040 }
35041 }
35042
35043 "UNIX_MILLIS" if args.len() == 1 => {
35044 let ts = args.remove(0);
35045 match target {
35046 DialectType::DuckDB => {
35047 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
35048 let cast_ts = Self::ensure_cast_timestamptz(ts);
35049 Ok(Expression::Function(Box::new(Function::new(
35050 "EPOCH_MS".to_string(),
35051 vec![cast_ts],
35052 ))))
35053 }
35054 _ => Ok(Expression::Function(Box::new(Function::new(
35055 "UNIX_MILLIS".to_string(),
35056 vec![ts],
35057 )))),
35058 }
35059 }
35060
35061 "UNIX_MICROS" if args.len() == 1 => {
35062 let ts = args.remove(0);
35063 match target {
35064 DialectType::DuckDB => {
35065 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
35066 let cast_ts = Self::ensure_cast_timestamptz(ts);
35067 Ok(Expression::Function(Box::new(Function::new(
35068 "EPOCH_US".to_string(),
35069 vec![cast_ts],
35070 ))))
35071 }
35072 _ => Ok(Expression::Function(Box::new(Function::new(
35073 "UNIX_MICROS".to_string(),
35074 vec![ts],
35075 )))),
35076 }
35077 }
35078
35079 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
35080 "ARRAY_CONCAT" | "LIST_CONCAT" => {
35081 match target {
35082 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35083 // CONCAT(arr1, arr2, ...)
35084 Ok(Expression::Function(Box::new(Function::new(
35085 "CONCAT".to_string(),
35086 args,
35087 ))))
35088 }
35089 DialectType::Presto | DialectType::Trino => {
35090 // CONCAT(arr1, arr2, ...)
35091 Ok(Expression::Function(Box::new(Function::new(
35092 "CONCAT".to_string(),
35093 args,
35094 ))))
35095 }
35096 DialectType::Snowflake => {
35097 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
35098 if args.len() == 1 {
35099 // ARRAY_CAT requires 2 args, add empty array as []
35100 let empty_arr = Expression::ArrayFunc(Box::new(
35101 crate::expressions::ArrayConstructor {
35102 expressions: vec![],
35103 bracket_notation: true,
35104 use_list_keyword: false,
35105 },
35106 ));
35107 let mut new_args = args;
35108 new_args.push(empty_arr);
35109 Ok(Expression::Function(Box::new(Function::new(
35110 "ARRAY_CAT".to_string(),
35111 new_args,
35112 ))))
35113 } else if args.is_empty() {
35114 Ok(Expression::Function(Box::new(Function::new(
35115 "ARRAY_CAT".to_string(),
35116 args,
35117 ))))
35118 } else {
35119 let mut it = args.into_iter().rev();
35120 let mut result = it.next().unwrap();
35121 for arr in it {
35122 result = Expression::Function(Box::new(Function::new(
35123 "ARRAY_CAT".to_string(),
35124 vec![arr, result],
35125 )));
35126 }
35127 Ok(result)
35128 }
35129 }
35130 DialectType::PostgreSQL => {
35131 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
35132 if args.len() <= 1 {
35133 Ok(Expression::Function(Box::new(Function::new(
35134 "ARRAY_CAT".to_string(),
35135 args,
35136 ))))
35137 } else {
35138 let mut it = args.into_iter().rev();
35139 let mut result = it.next().unwrap();
35140 for arr in it {
35141 result = Expression::Function(Box::new(Function::new(
35142 "ARRAY_CAT".to_string(),
35143 vec![arr, result],
35144 )));
35145 }
35146 Ok(result)
35147 }
35148 }
35149 DialectType::Redshift => {
35150 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
35151 if args.len() <= 2 {
35152 Ok(Expression::Function(Box::new(Function::new(
35153 "ARRAY_CONCAT".to_string(),
35154 args,
35155 ))))
35156 } else {
35157 let mut it = args.into_iter().rev();
35158 let mut result = it.next().unwrap();
35159 for arr in it {
35160 result = Expression::Function(Box::new(Function::new(
35161 "ARRAY_CONCAT".to_string(),
35162 vec![arr, result],
35163 )));
35164 }
35165 Ok(result)
35166 }
35167 }
35168 DialectType::DuckDB => {
35169 // LIST_CONCAT supports multiple args natively in DuckDB
35170 Ok(Expression::Function(Box::new(Function::new(
35171 "LIST_CONCAT".to_string(),
35172 args,
35173 ))))
35174 }
35175 _ => Ok(Expression::Function(Box::new(Function::new(
35176 "ARRAY_CONCAT".to_string(),
35177 args,
35178 )))),
35179 }
35180 }
35181
35182 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
35183 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
35184 let arg = args.remove(0);
35185 match target {
35186 DialectType::Snowflake => {
35187 let array_agg =
35188 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
35189 this: arg,
35190 distinct: false,
35191 filter: None,
35192 order_by: vec![],
35193 name: None,
35194 ignore_nulls: None,
35195 having_max: None,
35196 limit: None,
35197 inferred_type: None,
35198 }));
35199 Ok(Expression::Function(Box::new(Function::new(
35200 "ARRAY_FLATTEN".to_string(),
35201 vec![array_agg],
35202 ))))
35203 }
35204 _ => Ok(Expression::Function(Box::new(Function::new(
35205 "ARRAY_CONCAT_AGG".to_string(),
35206 vec![arg],
35207 )))),
35208 }
35209 }
35210
35211 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
35212 "MD5" if args.len() == 1 => {
35213 let arg = args.remove(0);
35214 match target {
35215 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35216 // UNHEX(MD5(x))
35217 let md5 = Expression::Function(Box::new(Function::new(
35218 "MD5".to_string(),
35219 vec![arg],
35220 )));
35221 Ok(Expression::Function(Box::new(Function::new(
35222 "UNHEX".to_string(),
35223 vec![md5],
35224 ))))
35225 }
35226 DialectType::Snowflake => {
35227 // MD5_BINARY(x)
35228 Ok(Expression::Function(Box::new(Function::new(
35229 "MD5_BINARY".to_string(),
35230 vec![arg],
35231 ))))
35232 }
35233 _ => Ok(Expression::Function(Box::new(Function::new(
35234 "MD5".to_string(),
35235 vec![arg],
35236 )))),
35237 }
35238 }
35239
35240 "SHA1" if args.len() == 1 => {
35241 let arg = args.remove(0);
35242 match target {
35243 DialectType::DuckDB => {
35244 // UNHEX(SHA1(x))
35245 let sha1 = Expression::Function(Box::new(Function::new(
35246 "SHA1".to_string(),
35247 vec![arg],
35248 )));
35249 Ok(Expression::Function(Box::new(Function::new(
35250 "UNHEX".to_string(),
35251 vec![sha1],
35252 ))))
35253 }
35254 _ => Ok(Expression::Function(Box::new(Function::new(
35255 "SHA1".to_string(),
35256 vec![arg],
35257 )))),
35258 }
35259 }
35260
35261 "SHA256" if args.len() == 1 => {
35262 let arg = args.remove(0);
35263 match target {
35264 DialectType::DuckDB => {
35265 // UNHEX(SHA256(x))
35266 let sha = Expression::Function(Box::new(Function::new(
35267 "SHA256".to_string(),
35268 vec![arg],
35269 )));
35270 Ok(Expression::Function(Box::new(Function::new(
35271 "UNHEX".to_string(),
35272 vec![sha],
35273 ))))
35274 }
35275 DialectType::Snowflake => {
35276 // SHA2_BINARY(x, 256)
35277 Ok(Expression::Function(Box::new(Function::new(
35278 "SHA2_BINARY".to_string(),
35279 vec![arg, Expression::number(256)],
35280 ))))
35281 }
35282 DialectType::Redshift | DialectType::Spark => {
35283 // SHA2(x, 256)
35284 Ok(Expression::Function(Box::new(Function::new(
35285 "SHA2".to_string(),
35286 vec![arg, Expression::number(256)],
35287 ))))
35288 }
35289 _ => Ok(Expression::Function(Box::new(Function::new(
35290 "SHA256".to_string(),
35291 vec![arg],
35292 )))),
35293 }
35294 }
35295
35296 "SHA512" if args.len() == 1 => {
35297 let arg = args.remove(0);
35298 match target {
35299 DialectType::Snowflake => {
35300 // SHA2_BINARY(x, 512)
35301 Ok(Expression::Function(Box::new(Function::new(
35302 "SHA2_BINARY".to_string(),
35303 vec![arg, Expression::number(512)],
35304 ))))
35305 }
35306 DialectType::Redshift | DialectType::Spark => {
35307 // SHA2(x, 512)
35308 Ok(Expression::Function(Box::new(Function::new(
35309 "SHA2".to_string(),
35310 vec![arg, Expression::number(512)],
35311 ))))
35312 }
35313 _ => Ok(Expression::Function(Box::new(Function::new(
35314 "SHA512".to_string(),
35315 vec![arg],
35316 )))),
35317 }
35318 }
35319
35320 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
35321 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
35322 let str_expr = args.remove(0);
35323 let pattern = args.remove(0);
35324
35325 // Check if pattern contains capturing groups (parentheses)
35326 let has_groups = match &pattern {
35327 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
35328 let Literal::String(s) = lit.as_ref() else {
35329 unreachable!()
35330 };
35331 s.contains('(') && s.contains(')')
35332 }
35333 _ => false,
35334 };
35335
35336 match target {
35337 DialectType::DuckDB => {
35338 let group = if has_groups {
35339 Expression::number(1)
35340 } else {
35341 Expression::number(0)
35342 };
35343 Ok(Expression::Function(Box::new(Function::new(
35344 "REGEXP_EXTRACT_ALL".to_string(),
35345 vec![str_expr, pattern, group],
35346 ))))
35347 }
35348 DialectType::Spark | DialectType::Databricks => {
35349 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
35350 if has_groups {
35351 Ok(Expression::Function(Box::new(Function::new(
35352 "REGEXP_EXTRACT_ALL".to_string(),
35353 vec![str_expr, pattern],
35354 ))))
35355 } else {
35356 Ok(Expression::Function(Box::new(Function::new(
35357 "REGEXP_EXTRACT_ALL".to_string(),
35358 vec![str_expr, pattern, Expression::number(0)],
35359 ))))
35360 }
35361 }
35362 DialectType::Presto | DialectType::Trino => {
35363 if has_groups {
35364 Ok(Expression::Function(Box::new(Function::new(
35365 "REGEXP_EXTRACT_ALL".to_string(),
35366 vec![str_expr, pattern, Expression::number(1)],
35367 ))))
35368 } else {
35369 Ok(Expression::Function(Box::new(Function::new(
35370 "REGEXP_EXTRACT_ALL".to_string(),
35371 vec![str_expr, pattern],
35372 ))))
35373 }
35374 }
35375 DialectType::Snowflake => {
35376 if has_groups {
35377 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
35378 Ok(Expression::Function(Box::new(Function::new(
35379 "REGEXP_EXTRACT_ALL".to_string(),
35380 vec![
35381 str_expr,
35382 pattern,
35383 Expression::number(1),
35384 Expression::number(1),
35385 Expression::Literal(Box::new(Literal::String("c".to_string()))),
35386 Expression::number(1),
35387 ],
35388 ))))
35389 } else {
35390 Ok(Expression::Function(Box::new(Function::new(
35391 "REGEXP_EXTRACT_ALL".to_string(),
35392 vec![str_expr, pattern],
35393 ))))
35394 }
35395 }
35396 _ => Ok(Expression::Function(Box::new(Function::new(
35397 "REGEXP_EXTRACT_ALL".to_string(),
35398 vec![str_expr, pattern],
35399 )))),
35400 }
35401 }
35402
35403 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
35404 "MOD" if args.len() == 2 => {
35405 match target {
35406 DialectType::PostgreSQL
35407 | DialectType::DuckDB
35408 | DialectType::Presto
35409 | DialectType::Trino
35410 | DialectType::Athena
35411 | DialectType::Snowflake => {
35412 let x = args.remove(0);
35413 let y = args.remove(0);
35414 // Wrap complex expressions in parens to preserve precedence
35415 let needs_paren = |e: &Expression| {
35416 matches!(
35417 e,
35418 Expression::Add(_)
35419 | Expression::Sub(_)
35420 | Expression::Mul(_)
35421 | Expression::Div(_)
35422 )
35423 };
35424 let x = if needs_paren(&x) {
35425 Expression::Paren(Box::new(crate::expressions::Paren {
35426 this: x,
35427 trailing_comments: vec![],
35428 }))
35429 } else {
35430 x
35431 };
35432 let y = if needs_paren(&y) {
35433 Expression::Paren(Box::new(crate::expressions::Paren {
35434 this: y,
35435 trailing_comments: vec![],
35436 }))
35437 } else {
35438 y
35439 };
35440 Ok(Expression::Mod(Box::new(
35441 crate::expressions::BinaryOp::new(x, y),
35442 )))
35443 }
35444 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
35445 // Hive/Spark: a % b
35446 let x = args.remove(0);
35447 let y = args.remove(0);
35448 let needs_paren = |e: &Expression| {
35449 matches!(
35450 e,
35451 Expression::Add(_)
35452 | Expression::Sub(_)
35453 | Expression::Mul(_)
35454 | Expression::Div(_)
35455 )
35456 };
35457 let x = if needs_paren(&x) {
35458 Expression::Paren(Box::new(crate::expressions::Paren {
35459 this: x,
35460 trailing_comments: vec![],
35461 }))
35462 } else {
35463 x
35464 };
35465 let y = if needs_paren(&y) {
35466 Expression::Paren(Box::new(crate::expressions::Paren {
35467 this: y,
35468 trailing_comments: vec![],
35469 }))
35470 } else {
35471 y
35472 };
35473 Ok(Expression::Mod(Box::new(
35474 crate::expressions::BinaryOp::new(x, y),
35475 )))
35476 }
35477 _ => Ok(Expression::Function(Box::new(Function::new(
35478 "MOD".to_string(),
35479 args,
35480 )))),
35481 }
35482 }
35483
35484 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
35485 "ARRAY_FILTER" if args.len() == 2 => {
35486 let name = match target {
35487 DialectType::DuckDB => "LIST_FILTER",
35488 DialectType::StarRocks => "ARRAY_FILTER",
35489 _ => "FILTER",
35490 };
35491 Ok(Expression::Function(Box::new(Function::new(
35492 name.to_string(),
35493 args,
35494 ))))
35495 }
35496 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
35497 "FILTER" if args.len() == 2 => {
35498 let name = match target {
35499 DialectType::DuckDB => "LIST_FILTER",
35500 DialectType::StarRocks => "ARRAY_FILTER",
35501 _ => "FILTER",
35502 };
35503 Ok(Expression::Function(Box::new(Function::new(
35504 name.to_string(),
35505 args,
35506 ))))
35507 }
35508 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
35509 "REDUCE" if args.len() >= 3 => {
35510 let name = match target {
35511 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
35512 _ => "REDUCE",
35513 };
35514 Ok(Expression::Function(Box::new(Function::new(
35515 name.to_string(),
35516 args,
35517 ))))
35518 }
35519 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
35520 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
35521 Function::new("ARRAY_REVERSE".to_string(), args),
35522 ))),
35523
35524 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
35525 "CONCAT" if args.len() > 2 => match target {
35526 DialectType::DuckDB => {
35527 let mut it = args.into_iter();
35528 let mut result = it.next().unwrap();
35529 for arg in it {
35530 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
35531 this: Box::new(result),
35532 expression: Box::new(arg),
35533 safe: None,
35534 }));
35535 }
35536 Ok(result)
35537 }
35538 _ => Ok(Expression::Function(Box::new(Function::new(
35539 "CONCAT".to_string(),
35540 args,
35541 )))),
35542 },
35543
35544 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
35545 "GENERATE_DATE_ARRAY" => {
35546 if matches!(target, DialectType::BigQuery) {
35547 // BQ->BQ: add default interval if not present
35548 if args.len() == 2 {
35549 let start = args.remove(0);
35550 let end = args.remove(0);
35551 let default_interval =
35552 Expression::Interval(Box::new(crate::expressions::Interval {
35553 this: Some(Expression::Literal(Box::new(Literal::String(
35554 "1".to_string(),
35555 )))),
35556 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35557 unit: crate::expressions::IntervalUnit::Day,
35558 use_plural: false,
35559 }),
35560 }));
35561 Ok(Expression::Function(Box::new(Function::new(
35562 "GENERATE_DATE_ARRAY".to_string(),
35563 vec![start, end, default_interval],
35564 ))))
35565 } else {
35566 Ok(Expression::Function(Box::new(Function::new(
35567 "GENERATE_DATE_ARRAY".to_string(),
35568 args,
35569 ))))
35570 }
35571 } else if matches!(target, DialectType::DuckDB) {
35572 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
35573 let start = args.get(0).cloned();
35574 let end = args.get(1).cloned();
35575 let step = args.get(2).cloned().or_else(|| {
35576 Some(Expression::Interval(Box::new(
35577 crate::expressions::Interval {
35578 this: Some(Expression::Literal(Box::new(Literal::String(
35579 "1".to_string(),
35580 )))),
35581 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35582 unit: crate::expressions::IntervalUnit::Day,
35583 use_plural: false,
35584 }),
35585 },
35586 )))
35587 });
35588
35589 // Wrap start/end in CAST(... AS DATE) only for string literals
35590 let maybe_cast_date = |expr: Expression| -> Expression {
35591 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
35592 {
35593 Expression::Cast(Box::new(Cast {
35594 this: expr,
35595 to: DataType::Date,
35596 trailing_comments: vec![],
35597 double_colon_syntax: false,
35598 format: None,
35599 default: None,
35600 inferred_type: None,
35601 }))
35602 } else {
35603 expr
35604 }
35605 };
35606 let cast_start = start.map(maybe_cast_date);
35607 let cast_end = end.map(maybe_cast_date);
35608
35609 let gen_series =
35610 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
35611 start: cast_start.map(Box::new),
35612 end: cast_end.map(Box::new),
35613 step: step.map(Box::new),
35614 is_end_exclusive: None,
35615 }));
35616
35617 // Wrap in CAST(... AS DATE[])
35618 Ok(Expression::Cast(Box::new(Cast {
35619 this: gen_series,
35620 to: DataType::Array {
35621 element_type: Box::new(DataType::Date),
35622 dimension: None,
35623 },
35624 trailing_comments: vec![],
35625 double_colon_syntax: false,
35626 format: None,
35627 default: None,
35628 inferred_type: None,
35629 })))
35630 } else if matches!(target, DialectType::Snowflake) {
35631 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
35632 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
35633 if args.len() == 2 {
35634 let start = args.remove(0);
35635 let end = args.remove(0);
35636 let default_interval =
35637 Expression::Interval(Box::new(crate::expressions::Interval {
35638 this: Some(Expression::Literal(Box::new(Literal::String(
35639 "1".to_string(),
35640 )))),
35641 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35642 unit: crate::expressions::IntervalUnit::Day,
35643 use_plural: false,
35644 }),
35645 }));
35646 Ok(Expression::Function(Box::new(Function::new(
35647 "GENERATE_DATE_ARRAY".to_string(),
35648 vec![start, end, default_interval],
35649 ))))
35650 } else {
35651 Ok(Expression::Function(Box::new(Function::new(
35652 "GENERATE_DATE_ARRAY".to_string(),
35653 args,
35654 ))))
35655 }
35656 } else {
35657 // Convert to GenerateSeries for other targets
35658 let start = args.get(0).cloned();
35659 let end = args.get(1).cloned();
35660 let step = args.get(2).cloned().or_else(|| {
35661 Some(Expression::Interval(Box::new(
35662 crate::expressions::Interval {
35663 this: Some(Expression::Literal(Box::new(Literal::String(
35664 "1".to_string(),
35665 )))),
35666 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35667 unit: crate::expressions::IntervalUnit::Day,
35668 use_plural: false,
35669 }),
35670 },
35671 )))
35672 });
35673 Ok(Expression::GenerateSeries(Box::new(
35674 crate::expressions::GenerateSeries {
35675 start: start.map(Box::new),
35676 end: end.map(Box::new),
35677 step: step.map(Box::new),
35678 is_end_exclusive: None,
35679 },
35680 )))
35681 }
35682 }
35683
35684 // PARSE_DATE(format, str) -> target-specific
35685 "PARSE_DATE" if args.len() == 2 => {
35686 let format = args.remove(0);
35687 let str_expr = args.remove(0);
35688 match target {
35689 DialectType::DuckDB => {
35690 // CAST(STRPTIME(str, duck_format) AS DATE)
35691 let duck_format = Self::bq_format_to_duckdb(&format);
35692 let strptime = Expression::Function(Box::new(Function::new(
35693 "STRPTIME".to_string(),
35694 vec![str_expr, duck_format],
35695 )));
35696 Ok(Expression::Cast(Box::new(Cast {
35697 this: strptime,
35698 to: DataType::Date,
35699 trailing_comments: vec![],
35700 double_colon_syntax: false,
35701 format: None,
35702 default: None,
35703 inferred_type: None,
35704 })))
35705 }
35706 DialectType::Snowflake => {
35707 // _POLYGLOT_DATE(str, snowflake_format)
35708 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
35709 let sf_format = Self::bq_format_to_snowflake(&format);
35710 Ok(Expression::Function(Box::new(Function::new(
35711 "_POLYGLOT_DATE".to_string(),
35712 vec![str_expr, sf_format],
35713 ))))
35714 }
35715 _ => Ok(Expression::Function(Box::new(Function::new(
35716 "PARSE_DATE".to_string(),
35717 vec![format, str_expr],
35718 )))),
35719 }
35720 }
35721
35722 // PARSE_TIMESTAMP(format, str) -> target-specific
35723 "PARSE_TIMESTAMP" if args.len() >= 2 => {
35724 let format = args.remove(0);
35725 let str_expr = args.remove(0);
35726 let tz = if !args.is_empty() {
35727 Some(args.remove(0))
35728 } else {
35729 None
35730 };
35731 match target {
35732 DialectType::DuckDB => {
35733 let duck_format = Self::bq_format_to_duckdb(&format);
35734 let strptime = Expression::Function(Box::new(Function::new(
35735 "STRPTIME".to_string(),
35736 vec![str_expr, duck_format],
35737 )));
35738 Ok(strptime)
35739 }
35740 _ => {
35741 let mut result_args = vec![format, str_expr];
35742 if let Some(tz_arg) = tz {
35743 result_args.push(tz_arg);
35744 }
35745 Ok(Expression::Function(Box::new(Function::new(
35746 "PARSE_TIMESTAMP".to_string(),
35747 result_args,
35748 ))))
35749 }
35750 }
35751 }
35752
35753 // FORMAT_DATE(format, date) -> target-specific
35754 "FORMAT_DATE" if args.len() == 2 => {
35755 let format = args.remove(0);
35756 let date_expr = args.remove(0);
35757 match target {
35758 DialectType::DuckDB => {
35759 // STRFTIME(CAST(date AS DATE), format)
35760 let cast_date = Expression::Cast(Box::new(Cast {
35761 this: date_expr,
35762 to: DataType::Date,
35763 trailing_comments: vec![],
35764 double_colon_syntax: false,
35765 format: None,
35766 default: None,
35767 inferred_type: None,
35768 }));
35769 Ok(Expression::Function(Box::new(Function::new(
35770 "STRFTIME".to_string(),
35771 vec![cast_date, format],
35772 ))))
35773 }
35774 _ => Ok(Expression::Function(Box::new(Function::new(
35775 "FORMAT_DATE".to_string(),
35776 vec![format, date_expr],
35777 )))),
35778 }
35779 }
35780
35781 // FORMAT_DATETIME(format, datetime) -> target-specific
35782 "FORMAT_DATETIME" if args.len() == 2 => {
35783 let format = args.remove(0);
35784 let dt_expr = args.remove(0);
35785
35786 if matches!(target, DialectType::BigQuery) {
35787 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
35788 let norm_format = Self::bq_format_normalize_bq(&format);
35789 // Also strip DATETIME keyword from typed literals
35790 let norm_dt = match dt_expr {
35791 Expression::Literal(lit)
35792 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
35793 {
35794 let Literal::Timestamp(s) = lit.as_ref() else {
35795 unreachable!()
35796 };
35797 Expression::Cast(Box::new(Cast {
35798 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35799 to: DataType::Custom {
35800 name: "DATETIME".to_string(),
35801 },
35802 trailing_comments: vec![],
35803 double_colon_syntax: false,
35804 format: None,
35805 default: None,
35806 inferred_type: None,
35807 }))
35808 }
35809 other => other,
35810 };
35811 return Ok(Expression::Function(Box::new(Function::new(
35812 "FORMAT_DATETIME".to_string(),
35813 vec![norm_format, norm_dt],
35814 ))));
35815 }
35816
35817 match target {
35818 DialectType::DuckDB => {
35819 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
35820 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
35821 let duck_format = Self::bq_format_to_duckdb(&format);
35822 Ok(Expression::Function(Box::new(Function::new(
35823 "STRFTIME".to_string(),
35824 vec![cast_dt, duck_format],
35825 ))))
35826 }
35827 _ => Ok(Expression::Function(Box::new(Function::new(
35828 "FORMAT_DATETIME".to_string(),
35829 vec![format, dt_expr],
35830 )))),
35831 }
35832 }
35833
35834 // FORMAT_TIMESTAMP(format, ts) -> target-specific
35835 "FORMAT_TIMESTAMP" if args.len() == 2 => {
35836 let format = args.remove(0);
35837 let ts_expr = args.remove(0);
35838 match target {
35839 DialectType::DuckDB => {
35840 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
35841 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35842 let cast_ts = Expression::Cast(Box::new(Cast {
35843 this: cast_tstz,
35844 to: DataType::Timestamp {
35845 timezone: false,
35846 precision: None,
35847 },
35848 trailing_comments: vec![],
35849 double_colon_syntax: false,
35850 format: None,
35851 default: None,
35852 inferred_type: None,
35853 }));
35854 Ok(Expression::Function(Box::new(Function::new(
35855 "STRFTIME".to_string(),
35856 vec![cast_ts, format],
35857 ))))
35858 }
35859 DialectType::Snowflake => {
35860 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
35861 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35862 let cast_ts = Expression::Cast(Box::new(Cast {
35863 this: cast_tstz,
35864 to: DataType::Timestamp {
35865 timezone: false,
35866 precision: None,
35867 },
35868 trailing_comments: vec![],
35869 double_colon_syntax: false,
35870 format: None,
35871 default: None,
35872 inferred_type: None,
35873 }));
35874 let sf_format = Self::bq_format_to_snowflake(&format);
35875 Ok(Expression::Function(Box::new(Function::new(
35876 "TO_CHAR".to_string(),
35877 vec![cast_ts, sf_format],
35878 ))))
35879 }
35880 _ => Ok(Expression::Function(Box::new(Function::new(
35881 "FORMAT_TIMESTAMP".to_string(),
35882 vec![format, ts_expr],
35883 )))),
35884 }
35885 }
35886
35887 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
35888 "UNIX_DATE" if args.len() == 1 => {
35889 let date = args.remove(0);
35890 match target {
35891 DialectType::DuckDB => {
35892 let epoch = Expression::Cast(Box::new(Cast {
35893 this: Expression::Literal(Box::new(Literal::String(
35894 "1970-01-01".to_string(),
35895 ))),
35896 to: DataType::Date,
35897 trailing_comments: vec![],
35898 double_colon_syntax: false,
35899 format: None,
35900 default: None,
35901 inferred_type: None,
35902 }));
35903 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
35904 // Need to convert DATE literal to CAST
35905 let norm_date = Self::date_literal_to_cast(date);
35906 Ok(Expression::Function(Box::new(Function::new(
35907 "DATE_DIFF".to_string(),
35908 vec![
35909 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
35910 epoch,
35911 norm_date,
35912 ],
35913 ))))
35914 }
35915 _ => Ok(Expression::Function(Box::new(Function::new(
35916 "UNIX_DATE".to_string(),
35917 vec![date],
35918 )))),
35919 }
35920 }
35921
35922 // UNIX_SECONDS(ts) -> target-specific
35923 "UNIX_SECONDS" if args.len() == 1 => {
35924 let ts = args.remove(0);
35925 match target {
35926 DialectType::DuckDB => {
35927 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
35928 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35929 let epoch = Expression::Function(Box::new(Function::new(
35930 "EPOCH".to_string(),
35931 vec![norm_ts],
35932 )));
35933 Ok(Expression::Cast(Box::new(Cast {
35934 this: epoch,
35935 to: DataType::BigInt { length: None },
35936 trailing_comments: vec![],
35937 double_colon_syntax: false,
35938 format: None,
35939 default: None,
35940 inferred_type: None,
35941 })))
35942 }
35943 DialectType::Snowflake => {
35944 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
35945 let epoch = Expression::Cast(Box::new(Cast {
35946 this: Expression::Literal(Box::new(Literal::String(
35947 "1970-01-01 00:00:00+00".to_string(),
35948 ))),
35949 to: DataType::Timestamp {
35950 timezone: true,
35951 precision: None,
35952 },
35953 trailing_comments: vec![],
35954 double_colon_syntax: false,
35955 format: None,
35956 default: None,
35957 inferred_type: None,
35958 }));
35959 Ok(Expression::Function(Box::new(Function::new(
35960 "TIMESTAMPDIFF".to_string(),
35961 vec![
35962 Expression::Identifier(Identifier::new("SECONDS".to_string())),
35963 epoch,
35964 ts,
35965 ],
35966 ))))
35967 }
35968 _ => Ok(Expression::Function(Box::new(Function::new(
35969 "UNIX_SECONDS".to_string(),
35970 vec![ts],
35971 )))),
35972 }
35973 }
35974
35975 // UNIX_MILLIS(ts) -> target-specific
35976 "UNIX_MILLIS" if args.len() == 1 => {
35977 let ts = args.remove(0);
35978 match target {
35979 DialectType::DuckDB => {
35980 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35981 Ok(Expression::Function(Box::new(Function::new(
35982 "EPOCH_MS".to_string(),
35983 vec![norm_ts],
35984 ))))
35985 }
35986 _ => Ok(Expression::Function(Box::new(Function::new(
35987 "UNIX_MILLIS".to_string(),
35988 vec![ts],
35989 )))),
35990 }
35991 }
35992
35993 // UNIX_MICROS(ts) -> target-specific
35994 "UNIX_MICROS" if args.len() == 1 => {
35995 let ts = args.remove(0);
35996 match target {
35997 DialectType::DuckDB => {
35998 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35999 Ok(Expression::Function(Box::new(Function::new(
36000 "EPOCH_US".to_string(),
36001 vec![norm_ts],
36002 ))))
36003 }
36004 _ => Ok(Expression::Function(Box::new(Function::new(
36005 "UNIX_MICROS".to_string(),
36006 vec![ts],
36007 )))),
36008 }
36009 }
36010
36011 // INSTR(str, substr) -> target-specific
36012 "INSTR" => {
36013 if matches!(target, DialectType::BigQuery) {
36014 // BQ->BQ: keep as INSTR
36015 Ok(Expression::Function(Box::new(Function::new(
36016 "INSTR".to_string(),
36017 args,
36018 ))))
36019 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
36020 // Snowflake: CHARINDEX(substr, str) - swap args
36021 let str_expr = args.remove(0);
36022 let substr = args.remove(0);
36023 Ok(Expression::Function(Box::new(Function::new(
36024 "CHARINDEX".to_string(),
36025 vec![substr, str_expr],
36026 ))))
36027 } else {
36028 // Keep as INSTR for other targets
36029 Ok(Expression::Function(Box::new(Function::new(
36030 "INSTR".to_string(),
36031 args,
36032 ))))
36033 }
36034 }
36035
36036 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
36037 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
36038 if matches!(target, DialectType::BigQuery) {
36039 // BQ->BQ: always output with parens (function form), keep any timezone arg
36040 Ok(Expression::Function(Box::new(Function::new(name, args))))
36041 } else if name == "CURRENT_DATE" && args.len() == 1 {
36042 // CURRENT_DATE('UTC') - has timezone arg
36043 let tz_arg = args.remove(0);
36044 match target {
36045 DialectType::DuckDB => {
36046 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
36047 let ct = Expression::CurrentTimestamp(
36048 crate::expressions::CurrentTimestamp {
36049 precision: None,
36050 sysdate: false,
36051 },
36052 );
36053 let at_tz =
36054 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36055 this: ct,
36056 zone: tz_arg,
36057 }));
36058 Ok(Expression::Cast(Box::new(Cast {
36059 this: at_tz,
36060 to: DataType::Date,
36061 trailing_comments: vec![],
36062 double_colon_syntax: false,
36063 format: None,
36064 default: None,
36065 inferred_type: None,
36066 })))
36067 }
36068 DialectType::Snowflake => {
36069 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
36070 let ct = Expression::Function(Box::new(Function::new(
36071 "CURRENT_TIMESTAMP".to_string(),
36072 vec![],
36073 )));
36074 let convert = Expression::Function(Box::new(Function::new(
36075 "CONVERT_TIMEZONE".to_string(),
36076 vec![tz_arg, ct],
36077 )));
36078 Ok(Expression::Cast(Box::new(Cast {
36079 this: convert,
36080 to: DataType::Date,
36081 trailing_comments: vec![],
36082 double_colon_syntax: false,
36083 format: None,
36084 default: None,
36085 inferred_type: None,
36086 })))
36087 }
36088 _ => {
36089 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
36090 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
36091 Ok(Expression::AtTimeZone(Box::new(
36092 crate::expressions::AtTimeZone {
36093 this: cd,
36094 zone: tz_arg,
36095 },
36096 )))
36097 }
36098 }
36099 } else if (name == "CURRENT_TIMESTAMP"
36100 || name == "CURRENT_TIME"
36101 || name == "CURRENT_DATE")
36102 && args.is_empty()
36103 && matches!(
36104 target,
36105 DialectType::PostgreSQL
36106 | DialectType::DuckDB
36107 | DialectType::Presto
36108 | DialectType::Trino
36109 )
36110 {
36111 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
36112 if name == "CURRENT_TIMESTAMP" {
36113 Ok(Expression::CurrentTimestamp(
36114 crate::expressions::CurrentTimestamp {
36115 precision: None,
36116 sysdate: false,
36117 },
36118 ))
36119 } else if name == "CURRENT_DATE" {
36120 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
36121 } else {
36122 // CURRENT_TIME
36123 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
36124 precision: None,
36125 }))
36126 }
36127 } else {
36128 // All other targets: keep as function (with parens)
36129 Ok(Expression::Function(Box::new(Function::new(name, args))))
36130 }
36131 }
36132
36133 // JSON_QUERY(json, path) -> target-specific
36134 "JSON_QUERY" if args.len() == 2 => {
36135 match target {
36136 DialectType::DuckDB | DialectType::SQLite => {
36137 // json -> path syntax
36138 let json_expr = args.remove(0);
36139 let path = args.remove(0);
36140 Ok(Expression::JsonExtract(Box::new(
36141 crate::expressions::JsonExtractFunc {
36142 this: json_expr,
36143 path,
36144 returning: None,
36145 arrow_syntax: true,
36146 hash_arrow_syntax: false,
36147 wrapper_option: None,
36148 quotes_option: None,
36149 on_scalar_string: false,
36150 on_error: None,
36151 },
36152 )))
36153 }
36154 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
36155 Ok(Expression::Function(Box::new(Function::new(
36156 "GET_JSON_OBJECT".to_string(),
36157 args,
36158 ))))
36159 }
36160 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
36161 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
36162 )),
36163 _ => Ok(Expression::Function(Box::new(Function::new(
36164 "JSON_QUERY".to_string(),
36165 args,
36166 )))),
36167 }
36168 }
36169
36170 // JSON_VALUE_ARRAY(json, path) -> target-specific
36171 "JSON_VALUE_ARRAY" if args.len() == 2 => {
36172 match target {
36173 DialectType::DuckDB => {
36174 // CAST(json -> path AS TEXT[])
36175 let json_expr = args.remove(0);
36176 let path = args.remove(0);
36177 let arrow = Expression::JsonExtract(Box::new(
36178 crate::expressions::JsonExtractFunc {
36179 this: json_expr,
36180 path,
36181 returning: None,
36182 arrow_syntax: true,
36183 hash_arrow_syntax: false,
36184 wrapper_option: None,
36185 quotes_option: None,
36186 on_scalar_string: false,
36187 on_error: None,
36188 },
36189 ));
36190 Ok(Expression::Cast(Box::new(Cast {
36191 this: arrow,
36192 to: DataType::Array {
36193 element_type: Box::new(DataType::Text),
36194 dimension: None,
36195 },
36196 trailing_comments: vec![],
36197 double_colon_syntax: false,
36198 format: None,
36199 default: None,
36200 inferred_type: None,
36201 })))
36202 }
36203 DialectType::Snowflake => {
36204 let json_expr = args.remove(0);
36205 let path_expr = args.remove(0);
36206 // Convert JSON path from $.path to just path
36207 let sf_path = if let Expression::Literal(ref lit) = path_expr {
36208 if let Literal::String(ref s) = lit.as_ref() {
36209 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
36210 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
36211 } else {
36212 path_expr.clone()
36213 }
36214 } else {
36215 path_expr
36216 };
36217 let parse_json = Expression::Function(Box::new(Function::new(
36218 "PARSE_JSON".to_string(),
36219 vec![json_expr],
36220 )));
36221 let get_path = Expression::Function(Box::new(Function::new(
36222 "GET_PATH".to_string(),
36223 vec![parse_json, sf_path],
36224 )));
36225 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
36226 let cast_expr = Expression::Cast(Box::new(Cast {
36227 this: Expression::Identifier(Identifier::new("x")),
36228 to: DataType::VarChar {
36229 length: None,
36230 parenthesized_length: false,
36231 },
36232 trailing_comments: vec![],
36233 double_colon_syntax: false,
36234 format: None,
36235 default: None,
36236 inferred_type: None,
36237 }));
36238 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
36239 parameters: vec![Identifier::new("x")],
36240 body: cast_expr,
36241 colon: false,
36242 parameter_types: vec![],
36243 }));
36244 Ok(Expression::Function(Box::new(Function::new(
36245 "TRANSFORM".to_string(),
36246 vec![get_path, lambda],
36247 ))))
36248 }
36249 _ => Ok(Expression::Function(Box::new(Function::new(
36250 "JSON_VALUE_ARRAY".to_string(),
36251 args,
36252 )))),
36253 }
36254 }
36255
36256 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
36257 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
36258 // This is different from Hive/Spark where 3rd arg is "group_index"
36259 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
36260 match target {
36261 DialectType::DuckDB
36262 | DialectType::Presto
36263 | DialectType::Trino
36264 | DialectType::Athena => {
36265 if args.len() == 2 {
36266 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
36267 args.push(Expression::number(1));
36268 Ok(Expression::Function(Box::new(Function::new(
36269 "REGEXP_EXTRACT".to_string(),
36270 args,
36271 ))))
36272 } else if args.len() == 3 {
36273 let val = args.remove(0);
36274 let regex = args.remove(0);
36275 let position = args.remove(0);
36276 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
36277 if is_pos_1 {
36278 Ok(Expression::Function(Box::new(Function::new(
36279 "REGEXP_EXTRACT".to_string(),
36280 vec![val, regex, Expression::number(1)],
36281 ))))
36282 } else {
36283 let substring_expr = Expression::Function(Box::new(Function::new(
36284 "SUBSTRING".to_string(),
36285 vec![val, position],
36286 )));
36287 let nullif_expr = Expression::Function(Box::new(Function::new(
36288 "NULLIF".to_string(),
36289 vec![
36290 substring_expr,
36291 Expression::Literal(Box::new(Literal::String(
36292 String::new(),
36293 ))),
36294 ],
36295 )));
36296 Ok(Expression::Function(Box::new(Function::new(
36297 "REGEXP_EXTRACT".to_string(),
36298 vec![nullif_expr, regex, Expression::number(1)],
36299 ))))
36300 }
36301 } else if args.len() == 4 {
36302 let val = args.remove(0);
36303 let regex = args.remove(0);
36304 let position = args.remove(0);
36305 let occurrence = args.remove(0);
36306 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
36307 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
36308 if is_pos_1 && is_occ_1 {
36309 Ok(Expression::Function(Box::new(Function::new(
36310 "REGEXP_EXTRACT".to_string(),
36311 vec![val, regex, Expression::number(1)],
36312 ))))
36313 } else {
36314 let subject = if is_pos_1 {
36315 val
36316 } else {
36317 let substring_expr = Expression::Function(Box::new(
36318 Function::new("SUBSTRING".to_string(), vec![val, position]),
36319 ));
36320 Expression::Function(Box::new(Function::new(
36321 "NULLIF".to_string(),
36322 vec![
36323 substring_expr,
36324 Expression::Literal(Box::new(Literal::String(
36325 String::new(),
36326 ))),
36327 ],
36328 )))
36329 };
36330 let extract_all = Expression::Function(Box::new(Function::new(
36331 "REGEXP_EXTRACT_ALL".to_string(),
36332 vec![subject, regex, Expression::number(1)],
36333 )));
36334 Ok(Expression::Function(Box::new(Function::new(
36335 "ARRAY_EXTRACT".to_string(),
36336 vec![extract_all, occurrence],
36337 ))))
36338 }
36339 } else {
36340 Ok(Expression::Function(Box::new(Function {
36341 name: f.name,
36342 args,
36343 distinct: f.distinct,
36344 trailing_comments: f.trailing_comments,
36345 use_bracket_syntax: f.use_bracket_syntax,
36346 no_parens: f.no_parens,
36347 quoted: f.quoted,
36348 span: None,
36349 inferred_type: None,
36350 })))
36351 }
36352 }
36353 DialectType::Snowflake => {
36354 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
36355 Ok(Expression::Function(Box::new(Function::new(
36356 "REGEXP_SUBSTR".to_string(),
36357 args,
36358 ))))
36359 }
36360 _ => {
36361 // For other targets (Hive/Spark/BigQuery): pass through as-is
36362 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
36363 Ok(Expression::Function(Box::new(Function {
36364 name: f.name,
36365 args,
36366 distinct: f.distinct,
36367 trailing_comments: f.trailing_comments,
36368 use_bracket_syntax: f.use_bracket_syntax,
36369 no_parens: f.no_parens,
36370 quoted: f.quoted,
36371 span: None,
36372 inferred_type: None,
36373 })))
36374 }
36375 }
36376 }
36377
36378 // BigQuery STRUCT(args) -> target-specific struct expression
36379 "STRUCT" => {
36380 // Convert Function args to Struct fields
36381 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
36382 for (i, arg) in args.into_iter().enumerate() {
36383 match arg {
36384 Expression::Alias(a) => {
36385 // Named field: expr AS name
36386 fields.push((Some(a.alias.name.clone()), a.this));
36387 }
36388 other => {
36389 // Unnamed field: for Spark/Hive, keep as None
36390 // For Snowflake, auto-name as _N
36391 // For DuckDB, use column name for column refs, _N for others
36392 if matches!(target, DialectType::Snowflake) {
36393 fields.push((Some(format!("_{}", i)), other));
36394 } else if matches!(target, DialectType::DuckDB) {
36395 let auto_name = match &other {
36396 Expression::Column(col) => col.name.name.clone(),
36397 _ => format!("_{}", i),
36398 };
36399 fields.push((Some(auto_name), other));
36400 } else {
36401 fields.push((None, other));
36402 }
36403 }
36404 }
36405 }
36406
36407 match target {
36408 DialectType::Snowflake => {
36409 // OBJECT_CONSTRUCT('name', value, ...)
36410 let mut oc_args = Vec::new();
36411 for (name, val) in &fields {
36412 if let Some(n) = name {
36413 oc_args.push(Expression::Literal(Box::new(Literal::String(
36414 n.clone(),
36415 ))));
36416 oc_args.push(val.clone());
36417 } else {
36418 oc_args.push(val.clone());
36419 }
36420 }
36421 Ok(Expression::Function(Box::new(Function::new(
36422 "OBJECT_CONSTRUCT".to_string(),
36423 oc_args,
36424 ))))
36425 }
36426 DialectType::DuckDB => {
36427 // {'name': value, ...}
36428 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36429 fields,
36430 })))
36431 }
36432 DialectType::Hive => {
36433 // STRUCT(val1, val2, ...) - strip aliases
36434 let hive_fields: Vec<(Option<String>, Expression)> =
36435 fields.into_iter().map(|(_, v)| (None, v)).collect();
36436 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36437 fields: hive_fields,
36438 })))
36439 }
36440 DialectType::Spark | DialectType::Databricks => {
36441 // Use Expression::Struct to bypass Spark target transform auto-naming
36442 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36443 fields,
36444 })))
36445 }
36446 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
36447 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
36448 let all_named =
36449 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
36450 let all_types_inferable = all_named
36451 && fields
36452 .iter()
36453 .all(|(_, val)| Self::can_infer_presto_type(val));
36454 let row_args: Vec<Expression> =
36455 fields.iter().map(|(_, v)| v.clone()).collect();
36456 let row_expr = Expression::Function(Box::new(Function::new(
36457 "ROW".to_string(),
36458 row_args,
36459 )));
36460 if all_named && all_types_inferable {
36461 // Build ROW type with inferred types
36462 let mut row_type_fields = Vec::new();
36463 for (name, val) in &fields {
36464 if let Some(n) = name {
36465 let type_str = Self::infer_sql_type_for_presto(val);
36466 row_type_fields.push(crate::expressions::StructField::new(
36467 n.clone(),
36468 crate::expressions::DataType::Custom { name: type_str },
36469 ));
36470 }
36471 }
36472 let row_type = crate::expressions::DataType::Struct {
36473 fields: row_type_fields,
36474 nested: true,
36475 };
36476 Ok(Expression::Cast(Box::new(Cast {
36477 this: row_expr,
36478 to: row_type,
36479 trailing_comments: Vec::new(),
36480 double_colon_syntax: false,
36481 format: None,
36482 default: None,
36483 inferred_type: None,
36484 })))
36485 } else {
36486 Ok(row_expr)
36487 }
36488 }
36489 _ => {
36490 // Default: keep as STRUCT function with original args
36491 let mut new_args = Vec::new();
36492 for (name, val) in fields {
36493 if let Some(n) = name {
36494 new_args.push(Expression::Alias(Box::new(
36495 crate::expressions::Alias::new(val, Identifier::new(n)),
36496 )));
36497 } else {
36498 new_args.push(val);
36499 }
36500 }
36501 Ok(Expression::Function(Box::new(Function::new(
36502 "STRUCT".to_string(),
36503 new_args,
36504 ))))
36505 }
36506 }
36507 }
36508
36509 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
36510 "ROUND" if args.len() == 3 => {
36511 let x = args.remove(0);
36512 let n = args.remove(0);
36513 let mode = args.remove(0);
36514 // Check if mode is 'ROUND_HALF_EVEN'
36515 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
36516 if is_half_even && matches!(target, DialectType::DuckDB) {
36517 Ok(Expression::Function(Box::new(Function::new(
36518 "ROUND_EVEN".to_string(),
36519 vec![x, n],
36520 ))))
36521 } else {
36522 // Pass through with all args
36523 Ok(Expression::Function(Box::new(Function::new(
36524 "ROUND".to_string(),
36525 vec![x, n, mode],
36526 ))))
36527 }
36528 }
36529
36530 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
36531 "MAKE_INTERVAL" => {
36532 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
36533 // The positional args are: year, month
36534 // Named args are: day =>, minute =>, etc.
36535 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
36536 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
36537 // For BigQuery->BigQuery: reorder named args (day before minute)
36538 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
36539 let mut parts: Vec<(String, String)> = Vec::new();
36540 let mut pos_idx = 0;
36541 let pos_units = ["year", "month"];
36542 for arg in &args {
36543 if let Expression::NamedArgument(na) = arg {
36544 // Named arg like minute => 5
36545 let unit = na.name.name.clone();
36546 if let Expression::Literal(lit) = &na.value {
36547 if let Literal::Number(n) = lit.as_ref() {
36548 parts.push((unit, n.clone()));
36549 }
36550 }
36551 } else if pos_idx < pos_units.len() {
36552 if let Expression::Literal(lit) = arg {
36553 if let Literal::Number(n) = lit.as_ref() {
36554 parts.push((pos_units[pos_idx].to_string(), n.clone()));
36555 }
36556 }
36557 pos_idx += 1;
36558 }
36559 }
36560 // Don't sort - preserve original argument order
36561 let separator = if matches!(target, DialectType::Snowflake) {
36562 ", "
36563 } else {
36564 " "
36565 };
36566 let interval_str = parts
36567 .iter()
36568 .map(|(u, v)| format!("{} {}", v, u))
36569 .collect::<Vec<_>>()
36570 .join(separator);
36571 Ok(Expression::Interval(Box::new(
36572 crate::expressions::Interval {
36573 this: Some(Expression::Literal(Box::new(Literal::String(
36574 interval_str,
36575 )))),
36576 unit: None,
36577 },
36578 )))
36579 } else if matches!(target, DialectType::BigQuery) {
36580 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
36581 let mut positional = Vec::new();
36582 let mut named: Vec<(
36583 String,
36584 Expression,
36585 crate::expressions::NamedArgSeparator,
36586 )> = Vec::new();
36587 let _pos_units = ["year", "month"];
36588 let mut _pos_idx = 0;
36589 for arg in args {
36590 if let Expression::NamedArgument(na) = arg {
36591 named.push((na.name.name.clone(), na.value, na.separator));
36592 } else {
36593 positional.push(arg);
36594 _pos_idx += 1;
36595 }
36596 }
36597 // Sort named args by: day, hour, minute, second
36598 let unit_order = |u: &str| -> usize {
36599 match u.to_ascii_lowercase().as_str() {
36600 "day" => 0,
36601 "hour" => 1,
36602 "minute" => 2,
36603 "second" => 3,
36604 _ => 4,
36605 }
36606 };
36607 named.sort_by_key(|(u, _, _)| unit_order(u));
36608 let mut result_args = positional;
36609 for (name, value, sep) in named {
36610 result_args.push(Expression::NamedArgument(Box::new(
36611 crate::expressions::NamedArgument {
36612 name: Identifier::new(&name),
36613 value,
36614 separator: sep,
36615 },
36616 )));
36617 }
36618 Ok(Expression::Function(Box::new(Function::new(
36619 "MAKE_INTERVAL".to_string(),
36620 result_args,
36621 ))))
36622 } else {
36623 Ok(Expression::Function(Box::new(Function::new(
36624 "MAKE_INTERVAL".to_string(),
36625 args,
36626 ))))
36627 }
36628 }
36629
36630 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
36631 "ARRAY_TO_STRING" if args.len() == 3 => {
36632 let arr = args.remove(0);
36633 let sep = args.remove(0);
36634 let null_text = args.remove(0);
36635 match target {
36636 DialectType::DuckDB => {
36637 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
36638 let _lambda_param =
36639 Expression::Identifier(crate::expressions::Identifier::new("x"));
36640 let coalesce =
36641 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
36642 original_name: None,
36643 expressions: vec![
36644 Expression::Identifier(crate::expressions::Identifier::new(
36645 "x",
36646 )),
36647 null_text,
36648 ],
36649 inferred_type: None,
36650 }));
36651 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
36652 parameters: vec![crate::expressions::Identifier::new("x")],
36653 body: coalesce,
36654 colon: false,
36655 parameter_types: vec![],
36656 }));
36657 let list_transform = Expression::Function(Box::new(Function::new(
36658 "LIST_TRANSFORM".to_string(),
36659 vec![arr, lambda],
36660 )));
36661 Ok(Expression::Function(Box::new(Function::new(
36662 "ARRAY_TO_STRING".to_string(),
36663 vec![list_transform, sep],
36664 ))))
36665 }
36666 _ => Ok(Expression::Function(Box::new(Function::new(
36667 "ARRAY_TO_STRING".to_string(),
36668 vec![arr, sep, null_text],
36669 )))),
36670 }
36671 }
36672
36673 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
36674 "LENGTH" if args.len() == 1 => {
36675 let arg = args.remove(0);
36676 match target {
36677 DialectType::DuckDB => {
36678 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
36679 let typeof_func = Expression::Function(Box::new(Function::new(
36680 "TYPEOF".to_string(),
36681 vec![arg.clone()],
36682 )));
36683 let blob_cast = Expression::Cast(Box::new(Cast {
36684 this: arg.clone(),
36685 to: DataType::VarBinary { length: None },
36686 trailing_comments: vec![],
36687 double_colon_syntax: false,
36688 format: None,
36689 default: None,
36690 inferred_type: None,
36691 }));
36692 let octet_length = Expression::Function(Box::new(Function::new(
36693 "OCTET_LENGTH".to_string(),
36694 vec![blob_cast],
36695 )));
36696 let text_cast = Expression::Cast(Box::new(Cast {
36697 this: arg,
36698 to: DataType::Text,
36699 trailing_comments: vec![],
36700 double_colon_syntax: false,
36701 format: None,
36702 default: None,
36703 inferred_type: None,
36704 }));
36705 let length_text = Expression::Function(Box::new(Function::new(
36706 "LENGTH".to_string(),
36707 vec![text_cast],
36708 )));
36709 Ok(Expression::Case(Box::new(crate::expressions::Case {
36710 operand: Some(typeof_func),
36711 whens: vec![(
36712 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
36713 octet_length,
36714 )],
36715 else_: Some(length_text),
36716 comments: Vec::new(),
36717 inferred_type: None,
36718 })))
36719 }
36720 _ => Ok(Expression::Function(Box::new(Function::new(
36721 "LENGTH".to_string(),
36722 vec![arg],
36723 )))),
36724 }
36725 }
36726
36727 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
36728 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
36729 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
36730 // The args should be [x, fraction] with the null handling stripped
36731 // For DuckDB: QUANTILE_CONT(x, fraction)
36732 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
36733 match target {
36734 DialectType::DuckDB => {
36735 // Strip down to just 2 args, rename to QUANTILE_CONT
36736 let x = args[0].clone();
36737 let frac = args[1].clone();
36738 Ok(Expression::Function(Box::new(Function::new(
36739 "QUANTILE_CONT".to_string(),
36740 vec![x, frac],
36741 ))))
36742 }
36743 _ => Ok(Expression::Function(Box::new(Function::new(
36744 "PERCENTILE_CONT".to_string(),
36745 args,
36746 )))),
36747 }
36748 }
36749
36750 // All others: pass through
36751 _ => Ok(Expression::Function(Box::new(Function {
36752 name: f.name,
36753 args,
36754 distinct: f.distinct,
36755 trailing_comments: f.trailing_comments,
36756 use_bracket_syntax: f.use_bracket_syntax,
36757 no_parens: f.no_parens,
36758 quoted: f.quoted,
36759 span: None,
36760 inferred_type: None,
36761 }))),
36762 }
36763 }
36764
36765 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
36766 /// Returns false for column references and other non-literal expressions where the type is unknown.
36767 fn can_infer_presto_type(expr: &Expression) -> bool {
36768 match expr {
36769 Expression::Literal(_) => true,
36770 Expression::Boolean(_) => true,
36771 Expression::Array(_) | Expression::ArrayFunc(_) => true,
36772 Expression::Struct(_) | Expression::StructFunc(_) => true,
36773 Expression::Function(f) => {
36774 f.name.eq_ignore_ascii_case("STRUCT")
36775 || f.name.eq_ignore_ascii_case("ROW")
36776 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
36777 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36778 || f.name.eq_ignore_ascii_case("NOW")
36779 }
36780 Expression::Cast(_) => true,
36781 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
36782 _ => false,
36783 }
36784 }
36785
36786 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
36787 fn infer_sql_type_for_presto(expr: &Expression) -> String {
36788 use crate::expressions::Literal;
36789 match expr {
36790 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36791 "VARCHAR".to_string()
36792 }
36793 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36794 let Literal::Number(n) = lit.as_ref() else {
36795 unreachable!()
36796 };
36797 if n.contains('.') {
36798 "DOUBLE".to_string()
36799 } else {
36800 "INTEGER".to_string()
36801 }
36802 }
36803 Expression::Boolean(_) => "BOOLEAN".to_string(),
36804 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36805 "DATE".to_string()
36806 }
36807 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36808 "TIMESTAMP".to_string()
36809 }
36810 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
36811 "TIMESTAMP".to_string()
36812 }
36813 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
36814 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
36815 Expression::Function(f) => {
36816 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
36817 "ROW".to_string()
36818 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
36819 "DATE".to_string()
36820 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36821 || f.name.eq_ignore_ascii_case("NOW")
36822 {
36823 "TIMESTAMP".to_string()
36824 } else {
36825 "VARCHAR".to_string()
36826 }
36827 }
36828 Expression::Cast(c) => {
36829 // If already cast, use the target type
36830 Self::data_type_to_presto_string(&c.to)
36831 }
36832 _ => "VARCHAR".to_string(),
36833 }
36834 }
36835
36836 /// Convert a DataType to its Presto/Trino string representation for ROW type
36837 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
36838 use crate::expressions::DataType;
36839 match dt {
36840 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
36841 "VARCHAR".to_string()
36842 }
36843 DataType::Int { .. }
36844 | DataType::BigInt { .. }
36845 | DataType::SmallInt { .. }
36846 | DataType::TinyInt { .. } => "INTEGER".to_string(),
36847 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
36848 DataType::Boolean => "BOOLEAN".to_string(),
36849 DataType::Date => "DATE".to_string(),
36850 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
36851 DataType::Struct { fields, .. } => {
36852 let field_strs: Vec<String> = fields
36853 .iter()
36854 .map(|f| {
36855 format!(
36856 "{} {}",
36857 f.name,
36858 Self::data_type_to_presto_string(&f.data_type)
36859 )
36860 })
36861 .collect();
36862 format!("ROW({})", field_strs.join(", "))
36863 }
36864 DataType::Array { element_type, .. } => {
36865 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
36866 }
36867 DataType::Custom { name } => {
36868 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
36869 name.clone()
36870 }
36871 _ => "VARCHAR".to_string(),
36872 }
36873 }
36874
36875 /// Convert IntervalUnit to string
36876 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
36877 match unit {
36878 crate::expressions::IntervalUnit::Year => "YEAR",
36879 crate::expressions::IntervalUnit::Quarter => "QUARTER",
36880 crate::expressions::IntervalUnit::Month => "MONTH",
36881 crate::expressions::IntervalUnit::Week => "WEEK",
36882 crate::expressions::IntervalUnit::Day => "DAY",
36883 crate::expressions::IntervalUnit::Hour => "HOUR",
36884 crate::expressions::IntervalUnit::Minute => "MINUTE",
36885 crate::expressions::IntervalUnit::Second => "SECOND",
36886 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
36887 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
36888 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
36889 }
36890 }
36891
36892 /// Extract unit string from an expression (uppercased)
36893 fn get_unit_str_static(expr: &Expression) -> String {
36894 use crate::expressions::Literal;
36895 match expr {
36896 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
36897 Expression::Var(v) => v.this.to_ascii_uppercase(),
36898 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36899 let Literal::String(s) = lit.as_ref() else {
36900 unreachable!()
36901 };
36902 s.to_ascii_uppercase()
36903 }
36904 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
36905 Expression::Function(f) => {
36906 let base = f.name.to_ascii_uppercase();
36907 if !f.args.is_empty() {
36908 let inner = Self::get_unit_str_static(&f.args[0]);
36909 format!("{}({})", base, inner)
36910 } else {
36911 base
36912 }
36913 }
36914 _ => "DAY".to_string(),
36915 }
36916 }
36917
36918 /// Parse unit string to IntervalUnit
36919 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
36920 match s {
36921 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
36922 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
36923 "MONTH" | "MONTHS" | "MON" | "MONS" | "MM" | "M" => {
36924 crate::expressions::IntervalUnit::Month
36925 }
36926 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
36927 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
36928 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
36929 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
36930 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
36931 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
36932 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
36933 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
36934 _ => crate::expressions::IntervalUnit::Day,
36935 }
36936 }
36937
36938 /// Convert expression to simple string for interval building
36939 fn expr_to_string_static(expr: &Expression) -> String {
36940 use crate::expressions::Literal;
36941 match expr {
36942 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36943 let Literal::Number(s) = lit.as_ref() else {
36944 unreachable!()
36945 };
36946 s.clone()
36947 }
36948 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36949 let Literal::String(s) = lit.as_ref() else {
36950 unreachable!()
36951 };
36952 s.clone()
36953 }
36954 Expression::Identifier(id) => id.name.clone(),
36955 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
36956 _ => "1".to_string(),
36957 }
36958 }
36959
36960 /// Extract a simple string representation from a literal expression
36961 fn expr_to_string(expr: &Expression) -> String {
36962 use crate::expressions::Literal;
36963 match expr {
36964 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36965 let Literal::Number(s) = lit.as_ref() else {
36966 unreachable!()
36967 };
36968 s.clone()
36969 }
36970 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36971 let Literal::String(s) = lit.as_ref() else {
36972 unreachable!()
36973 };
36974 s.clone()
36975 }
36976 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
36977 Expression::Identifier(id) => id.name.clone(),
36978 _ => "1".to_string(),
36979 }
36980 }
36981
36982 /// Quote an interval value expression as a string literal if it's a number (or negated number)
36983 fn quote_interval_val(expr: &Expression) -> Expression {
36984 use crate::expressions::Literal;
36985 match expr {
36986 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36987 let Literal::Number(n) = lit.as_ref() else {
36988 unreachable!()
36989 };
36990 Expression::Literal(Box::new(Literal::String(n.clone())))
36991 }
36992 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
36993 Expression::Neg(inner) => {
36994 if let Expression::Literal(lit) = &inner.this {
36995 if let Literal::Number(n) = lit.as_ref() {
36996 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
36997 } else {
36998 inner.this.clone()
36999 }
37000 } else {
37001 expr.clone()
37002 }
37003 }
37004 _ => expr.clone(),
37005 }
37006 }
37007
37008 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
37009 fn timestamp_string_has_timezone(ts: &str) -> bool {
37010 let trimmed = ts.trim();
37011 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
37012 if let Some(last_space) = trimmed.rfind(' ') {
37013 let suffix = &trimmed[last_space + 1..];
37014 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
37015 let rest = &suffix[1..];
37016 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
37017 return true;
37018 }
37019 }
37020 }
37021 // Check for named timezone abbreviations
37022 let ts_lower = trimmed.to_ascii_lowercase();
37023 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
37024 for abbrev in &tz_abbrevs {
37025 if ts_lower.ends_with(abbrev) {
37026 return true;
37027 }
37028 }
37029 false
37030 }
37031
37032 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
37033 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
37034 use crate::expressions::{Cast, DataType, Literal};
37035 match expr {
37036 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37037 let Literal::Timestamp(s) = lit.as_ref() else {
37038 unreachable!()
37039 };
37040 let tz = func_name.starts_with("TIMESTAMP");
37041 Expression::Cast(Box::new(Cast {
37042 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37043 to: if tz {
37044 DataType::Timestamp {
37045 timezone: true,
37046 precision: None,
37047 }
37048 } else {
37049 DataType::Timestamp {
37050 timezone: false,
37051 precision: None,
37052 }
37053 },
37054 trailing_comments: vec![],
37055 double_colon_syntax: false,
37056 format: None,
37057 default: None,
37058 inferred_type: None,
37059 }))
37060 }
37061 other => other,
37062 }
37063 }
37064
37065 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
37066 fn maybe_cast_ts(expr: Expression) -> Expression {
37067 use crate::expressions::{Cast, DataType, Literal};
37068 match expr {
37069 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37070 let Literal::Timestamp(s) = lit.as_ref() else {
37071 unreachable!()
37072 };
37073 Expression::Cast(Box::new(Cast {
37074 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37075 to: DataType::Timestamp {
37076 timezone: false,
37077 precision: None,
37078 },
37079 trailing_comments: vec![],
37080 double_colon_syntax: false,
37081 format: None,
37082 default: None,
37083 inferred_type: None,
37084 }))
37085 }
37086 other => other,
37087 }
37088 }
37089
37090 /// Convert DATE 'x' literal to CAST('x' AS DATE)
37091 fn date_literal_to_cast(expr: Expression) -> Expression {
37092 use crate::expressions::{Cast, DataType, Literal};
37093 match expr {
37094 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
37095 let Literal::Date(s) = lit.as_ref() else {
37096 unreachable!()
37097 };
37098 Expression::Cast(Box::new(Cast {
37099 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37100 to: DataType::Date,
37101 trailing_comments: vec![],
37102 double_colon_syntax: false,
37103 format: None,
37104 default: None,
37105 inferred_type: None,
37106 }))
37107 }
37108 other => other,
37109 }
37110 }
37111
37112 /// Ensure an expression that should be a date is CAST(... AS DATE).
37113 /// Handles both DATE literals and string literals that look like dates.
37114 fn ensure_cast_date(expr: Expression) -> Expression {
37115 use crate::expressions::{Cast, DataType, Literal};
37116 match expr {
37117 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
37118 let Literal::Date(s) = lit.as_ref() else {
37119 unreachable!()
37120 };
37121 Expression::Cast(Box::new(Cast {
37122 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37123 to: DataType::Date,
37124 trailing_comments: vec![],
37125 double_colon_syntax: false,
37126 format: None,
37127 default: None,
37128 inferred_type: None,
37129 }))
37130 }
37131 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37132 // String literal that should be a date -> CAST('s' AS DATE)
37133 Expression::Cast(Box::new(Cast {
37134 this: expr,
37135 to: DataType::Date,
37136 trailing_comments: vec![],
37137 double_colon_syntax: false,
37138 format: None,
37139 default: None,
37140 inferred_type: None,
37141 }))
37142 }
37143 // Already a CAST or other expression -> leave as-is
37144 other => other,
37145 }
37146 }
37147
37148 /// Force CAST(expr AS DATE) for any expression (not just literals)
37149 /// Skips if the expression is already a CAST to DATE
37150 fn force_cast_date(expr: Expression) -> Expression {
37151 use crate::expressions::{Cast, DataType};
37152 // If it's already a CAST to DATE, don't double-wrap
37153 if let Expression::Cast(ref c) = expr {
37154 if matches!(c.to, DataType::Date) {
37155 return expr;
37156 }
37157 }
37158 Expression::Cast(Box::new(Cast {
37159 this: expr,
37160 to: DataType::Date,
37161 trailing_comments: vec![],
37162 double_colon_syntax: false,
37163 format: None,
37164 default: None,
37165 inferred_type: None,
37166 }))
37167 }
37168
37169 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
37170 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
37171 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
37172 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
37173
37174 fn ensure_to_date_preserved(expr: Expression) -> Expression {
37175 use crate::expressions::{Function, Literal};
37176 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
37177 {
37178 Expression::Function(Box::new(Function::new(
37179 Self::PRESERVED_TO_DATE.to_string(),
37180 vec![expr],
37181 )))
37182 } else {
37183 expr
37184 }
37185 }
37186
37187 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
37188 fn try_cast_date(expr: Expression) -> Expression {
37189 use crate::expressions::{Cast, DataType};
37190 Expression::TryCast(Box::new(Cast {
37191 this: expr,
37192 to: DataType::Date,
37193 trailing_comments: vec![],
37194 double_colon_syntax: false,
37195 format: None,
37196 default: None,
37197 inferred_type: None,
37198 }))
37199 }
37200
37201 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
37202 fn double_cast_timestamp_date(expr: Expression) -> Expression {
37203 use crate::expressions::{Cast, DataType};
37204 let inner = Expression::Cast(Box::new(Cast {
37205 this: expr,
37206 to: DataType::Timestamp {
37207 timezone: false,
37208 precision: None,
37209 },
37210 trailing_comments: vec![],
37211 double_colon_syntax: false,
37212 format: None,
37213 default: None,
37214 inferred_type: None,
37215 }));
37216 Expression::Cast(Box::new(Cast {
37217 this: inner,
37218 to: DataType::Date,
37219 trailing_comments: vec![],
37220 double_colon_syntax: false,
37221 format: None,
37222 default: None,
37223 inferred_type: None,
37224 }))
37225 }
37226
37227 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
37228 fn double_cast_datetime_date(expr: Expression) -> Expression {
37229 use crate::expressions::{Cast, DataType};
37230 let inner = Expression::Cast(Box::new(Cast {
37231 this: expr,
37232 to: DataType::Custom {
37233 name: "DATETIME".to_string(),
37234 },
37235 trailing_comments: vec![],
37236 double_colon_syntax: false,
37237 format: None,
37238 default: None,
37239 inferred_type: None,
37240 }));
37241 Expression::Cast(Box::new(Cast {
37242 this: inner,
37243 to: DataType::Date,
37244 trailing_comments: vec![],
37245 double_colon_syntax: false,
37246 format: None,
37247 default: None,
37248 inferred_type: None,
37249 }))
37250 }
37251
37252 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
37253 fn double_cast_datetime2_date(expr: Expression) -> Expression {
37254 use crate::expressions::{Cast, DataType};
37255 let inner = Expression::Cast(Box::new(Cast {
37256 this: expr,
37257 to: DataType::Custom {
37258 name: "DATETIME2".to_string(),
37259 },
37260 trailing_comments: vec![],
37261 double_colon_syntax: false,
37262 format: None,
37263 default: None,
37264 inferred_type: None,
37265 }));
37266 Expression::Cast(Box::new(Cast {
37267 this: inner,
37268 to: DataType::Date,
37269 trailing_comments: vec![],
37270 double_colon_syntax: false,
37271 format: None,
37272 default: None,
37273 inferred_type: None,
37274 }))
37275 }
37276
37277 /// Convert Hive/Java-style date format strings to C-style (strftime) format
37278 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
37279 fn hive_format_to_c_format(fmt: &str) -> String {
37280 let mut result = String::new();
37281 let chars: Vec<char> = fmt.chars().collect();
37282 let mut i = 0;
37283 while i < chars.len() {
37284 match chars[i] {
37285 'y' => {
37286 let mut count = 0;
37287 while i < chars.len() && chars[i] == 'y' {
37288 count += 1;
37289 i += 1;
37290 }
37291 if count >= 4 {
37292 result.push_str("%Y");
37293 } else if count == 2 {
37294 result.push_str("%y");
37295 } else {
37296 result.push_str("%Y");
37297 }
37298 }
37299 'M' => {
37300 let mut count = 0;
37301 while i < chars.len() && chars[i] == 'M' {
37302 count += 1;
37303 i += 1;
37304 }
37305 if count >= 3 {
37306 result.push_str("%b");
37307 } else if count == 2 {
37308 result.push_str("%m");
37309 } else {
37310 result.push_str("%m");
37311 }
37312 }
37313 'd' => {
37314 let mut _count = 0;
37315 while i < chars.len() && chars[i] == 'd' {
37316 _count += 1;
37317 i += 1;
37318 }
37319 result.push_str("%d");
37320 }
37321 'H' => {
37322 let mut _count = 0;
37323 while i < chars.len() && chars[i] == 'H' {
37324 _count += 1;
37325 i += 1;
37326 }
37327 result.push_str("%H");
37328 }
37329 'h' => {
37330 let mut _count = 0;
37331 while i < chars.len() && chars[i] == 'h' {
37332 _count += 1;
37333 i += 1;
37334 }
37335 result.push_str("%I");
37336 }
37337 'm' => {
37338 let mut _count = 0;
37339 while i < chars.len() && chars[i] == 'm' {
37340 _count += 1;
37341 i += 1;
37342 }
37343 result.push_str("%M");
37344 }
37345 's' => {
37346 let mut _count = 0;
37347 while i < chars.len() && chars[i] == 's' {
37348 _count += 1;
37349 i += 1;
37350 }
37351 result.push_str("%S");
37352 }
37353 'S' => {
37354 // Fractional seconds - skip
37355 while i < chars.len() && chars[i] == 'S' {
37356 i += 1;
37357 }
37358 result.push_str("%f");
37359 }
37360 'a' => {
37361 // AM/PM
37362 while i < chars.len() && chars[i] == 'a' {
37363 i += 1;
37364 }
37365 result.push_str("%p");
37366 }
37367 'E' => {
37368 let mut count = 0;
37369 while i < chars.len() && chars[i] == 'E' {
37370 count += 1;
37371 i += 1;
37372 }
37373 if count >= 4 {
37374 result.push_str("%A");
37375 } else {
37376 result.push_str("%a");
37377 }
37378 }
37379 '\'' => {
37380 // Quoted literal text - pass through the quotes and content
37381 result.push('\'');
37382 i += 1;
37383 while i < chars.len() && chars[i] != '\'' {
37384 result.push(chars[i]);
37385 i += 1;
37386 }
37387 if i < chars.len() {
37388 result.push('\'');
37389 i += 1;
37390 }
37391 }
37392 c => {
37393 result.push(c);
37394 i += 1;
37395 }
37396 }
37397 }
37398 result
37399 }
37400
37401 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
37402 fn hive_format_to_presto_format(fmt: &str) -> String {
37403 let c_fmt = Self::hive_format_to_c_format(fmt);
37404 // Presto uses %T for HH:MM:SS
37405 c_fmt.replace("%H:%M:%S", "%T")
37406 }
37407
37408 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
37409 fn ensure_cast_timestamp(expr: Expression) -> Expression {
37410 use crate::expressions::{Cast, DataType, Literal};
37411 match expr {
37412 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37413 let Literal::Timestamp(s) = lit.as_ref() else {
37414 unreachable!()
37415 };
37416 Expression::Cast(Box::new(Cast {
37417 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37418 to: DataType::Timestamp {
37419 timezone: false,
37420 precision: None,
37421 },
37422 trailing_comments: vec![],
37423 double_colon_syntax: false,
37424 format: None,
37425 default: None,
37426 inferred_type: None,
37427 }))
37428 }
37429 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37430 Expression::Cast(Box::new(Cast {
37431 this: expr,
37432 to: DataType::Timestamp {
37433 timezone: false,
37434 precision: None,
37435 },
37436 trailing_comments: vec![],
37437 double_colon_syntax: false,
37438 format: None,
37439 default: None,
37440 inferred_type: None,
37441 }))
37442 }
37443 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
37444 let Literal::Datetime(s) = lit.as_ref() else {
37445 unreachable!()
37446 };
37447 Expression::Cast(Box::new(Cast {
37448 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37449 to: DataType::Timestamp {
37450 timezone: false,
37451 precision: None,
37452 },
37453 trailing_comments: vec![],
37454 double_colon_syntax: false,
37455 format: None,
37456 default: None,
37457 inferred_type: None,
37458 }))
37459 }
37460 other => other,
37461 }
37462 }
37463
37464 /// Force CAST to TIMESTAMP for any expression (not just literals)
37465 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
37466 fn force_cast_timestamp(expr: Expression) -> Expression {
37467 use crate::expressions::{Cast, DataType};
37468 // Don't double-wrap if already a CAST to TIMESTAMP
37469 if let Expression::Cast(ref c) = expr {
37470 if matches!(c.to, DataType::Timestamp { .. }) {
37471 return expr;
37472 }
37473 }
37474 Expression::Cast(Box::new(Cast {
37475 this: expr,
37476 to: DataType::Timestamp {
37477 timezone: false,
37478 precision: None,
37479 },
37480 trailing_comments: vec![],
37481 double_colon_syntax: false,
37482 format: None,
37483 default: None,
37484 inferred_type: None,
37485 }))
37486 }
37487
37488 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
37489 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
37490 use crate::expressions::{Cast, DataType, Literal};
37491 match expr {
37492 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37493 let Literal::Timestamp(s) = lit.as_ref() else {
37494 unreachable!()
37495 };
37496 Expression::Cast(Box::new(Cast {
37497 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37498 to: DataType::Timestamp {
37499 timezone: true,
37500 precision: None,
37501 },
37502 trailing_comments: vec![],
37503 double_colon_syntax: false,
37504 format: None,
37505 default: None,
37506 inferred_type: None,
37507 }))
37508 }
37509 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37510 Expression::Cast(Box::new(Cast {
37511 this: expr,
37512 to: DataType::Timestamp {
37513 timezone: true,
37514 precision: None,
37515 },
37516 trailing_comments: vec![],
37517 double_colon_syntax: false,
37518 format: None,
37519 default: None,
37520 inferred_type: None,
37521 }))
37522 }
37523 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
37524 let Literal::Datetime(s) = lit.as_ref() else {
37525 unreachable!()
37526 };
37527 Expression::Cast(Box::new(Cast {
37528 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37529 to: DataType::Timestamp {
37530 timezone: true,
37531 precision: None,
37532 },
37533 trailing_comments: vec![],
37534 double_colon_syntax: false,
37535 format: None,
37536 default: None,
37537 inferred_type: None,
37538 }))
37539 }
37540 other => other,
37541 }
37542 }
37543
37544 /// Ensure expression is CAST to DATETIME (for BigQuery)
37545 fn ensure_cast_datetime(expr: Expression) -> Expression {
37546 use crate::expressions::{Cast, DataType, Literal};
37547 match expr {
37548 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37549 Expression::Cast(Box::new(Cast {
37550 this: expr,
37551 to: DataType::Custom {
37552 name: "DATETIME".to_string(),
37553 },
37554 trailing_comments: vec![],
37555 double_colon_syntax: false,
37556 format: None,
37557 default: None,
37558 inferred_type: None,
37559 }))
37560 }
37561 other => other,
37562 }
37563 }
37564
37565 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
37566 fn force_cast_datetime(expr: Expression) -> Expression {
37567 use crate::expressions::{Cast, DataType};
37568 if let Expression::Cast(ref c) = expr {
37569 if let DataType::Custom { ref name } = c.to {
37570 if name.eq_ignore_ascii_case("DATETIME") {
37571 return expr;
37572 }
37573 }
37574 }
37575 Expression::Cast(Box::new(Cast {
37576 this: expr,
37577 to: DataType::Custom {
37578 name: "DATETIME".to_string(),
37579 },
37580 trailing_comments: vec![],
37581 double_colon_syntax: false,
37582 format: None,
37583 default: None,
37584 inferred_type: None,
37585 }))
37586 }
37587
37588 /// Ensure expression is CAST to DATETIME2 (for TSQL)
37589 fn ensure_cast_datetime2(expr: Expression) -> Expression {
37590 use crate::expressions::{Cast, DataType, Literal};
37591 match expr {
37592 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37593 Expression::Cast(Box::new(Cast {
37594 this: expr,
37595 to: DataType::Custom {
37596 name: "DATETIME2".to_string(),
37597 },
37598 trailing_comments: vec![],
37599 double_colon_syntax: false,
37600 format: None,
37601 default: None,
37602 inferred_type: None,
37603 }))
37604 }
37605 other => other,
37606 }
37607 }
37608
37609 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
37610 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
37611 use crate::expressions::{Cast, DataType, Literal};
37612 match expr {
37613 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37614 let Literal::Timestamp(s) = lit.as_ref() else {
37615 unreachable!()
37616 };
37617 Expression::Cast(Box::new(Cast {
37618 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37619 to: DataType::Timestamp {
37620 timezone: true,
37621 precision: None,
37622 },
37623 trailing_comments: vec![],
37624 double_colon_syntax: false,
37625 format: None,
37626 default: None,
37627 inferred_type: None,
37628 }))
37629 }
37630 other => other,
37631 }
37632 }
37633
37634 /// Convert BigQuery format string to Snowflake format string
37635 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
37636 use crate::expressions::Literal;
37637 if let Expression::Literal(lit) = format_expr {
37638 if let Literal::String(s) = lit.as_ref() {
37639 let sf = s
37640 .replace("%Y", "yyyy")
37641 .replace("%m", "mm")
37642 .replace("%d", "DD")
37643 .replace("%H", "HH24")
37644 .replace("%M", "MI")
37645 .replace("%S", "SS")
37646 .replace("%b", "mon")
37647 .replace("%B", "Month")
37648 .replace("%e", "FMDD");
37649 Expression::Literal(Box::new(Literal::String(sf)))
37650 } else {
37651 format_expr.clone()
37652 }
37653 } else {
37654 format_expr.clone()
37655 }
37656 }
37657
37658 /// Convert BigQuery format string to DuckDB format string
37659 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
37660 use crate::expressions::Literal;
37661 if let Expression::Literal(lit) = format_expr {
37662 if let Literal::String(s) = lit.as_ref() {
37663 let duck = s
37664 .replace("%T", "%H:%M:%S")
37665 .replace("%F", "%Y-%m-%d")
37666 .replace("%D", "%m/%d/%y")
37667 .replace("%x", "%m/%d/%y")
37668 .replace("%c", "%a %b %-d %H:%M:%S %Y")
37669 .replace("%e", "%-d")
37670 .replace("%E6S", "%S.%f");
37671 Expression::Literal(Box::new(Literal::String(duck)))
37672 } else {
37673 format_expr.clone()
37674 }
37675 } else {
37676 format_expr.clone()
37677 }
37678 }
37679
37680 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
37681 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
37682 use crate::expressions::Literal;
37683 if let Expression::Literal(lit) = format_expr {
37684 if let Literal::String(s) = lit.as_ref() {
37685 // Replace format elements from longest to shortest to avoid partial matches
37686 let result = s
37687 .replace("YYYYMMDD", "%Y%m%d")
37688 .replace("YYYY", "%Y")
37689 .replace("YY", "%y")
37690 .replace("MONTH", "%B")
37691 .replace("MON", "%b")
37692 .replace("MM", "%m")
37693 .replace("DD", "%d")
37694 .replace("HH24", "%H")
37695 .replace("HH12", "%I")
37696 .replace("HH", "%I")
37697 .replace("MI", "%M")
37698 .replace("SSTZH", "%S%z")
37699 .replace("SS", "%S")
37700 .replace("TZH", "%z");
37701 Expression::Literal(Box::new(Literal::String(result)))
37702 } else {
37703 format_expr.clone()
37704 }
37705 } else {
37706 format_expr.clone()
37707 }
37708 }
37709
37710 /// Normalize BigQuery format strings for BQ->BQ output
37711 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
37712 use crate::expressions::Literal;
37713 if let Expression::Literal(lit) = format_expr {
37714 if let Literal::String(s) = lit.as_ref() {
37715 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
37716 Expression::Literal(Box::new(Literal::String(norm)))
37717 } else {
37718 format_expr.clone()
37719 }
37720 } else {
37721 format_expr.clone()
37722 }
37723 }
37724}
37725
37726#[cfg(test)]
37727mod tests {
37728 use super::*;
37729
37730 #[test]
37731 fn test_dialect_type_from_str() {
37732 assert_eq!(
37733 "postgres".parse::<DialectType>().unwrap(),
37734 DialectType::PostgreSQL
37735 );
37736 assert_eq!(
37737 "postgresql".parse::<DialectType>().unwrap(),
37738 DialectType::PostgreSQL
37739 );
37740 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
37741 assert_eq!(
37742 "bigquery".parse::<DialectType>().unwrap(),
37743 DialectType::BigQuery
37744 );
37745 }
37746
37747 #[test]
37748 fn test_basic_transpile() {
37749 let dialect = Dialect::get(DialectType::Generic);
37750 let result = dialect
37751 .transpile("SELECT 1", DialectType::PostgreSQL)
37752 .unwrap();
37753 assert_eq!(result.len(), 1);
37754 assert_eq!(result[0], "SELECT 1");
37755 }
37756
37757 #[test]
37758 fn test_function_transformation_mysql() {
37759 // NVL should be transformed to IFNULL in MySQL
37760 let dialect = Dialect::get(DialectType::Generic);
37761 let result = dialect
37762 .transpile("SELECT NVL(a, b)", DialectType::MySQL)
37763 .unwrap();
37764 assert_eq!(result[0], "SELECT IFNULL(a, b)");
37765 }
37766
37767 #[test]
37768 fn test_get_path_duckdb() {
37769 // Test: step by step
37770 let snowflake = Dialect::get(DialectType::Snowflake);
37771
37772 // Step 1: Parse and check what Snowflake produces as intermediate
37773 let result_sf_sf = snowflake
37774 .transpile(
37775 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37776 DialectType::Snowflake,
37777 )
37778 .unwrap();
37779 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
37780
37781 // Step 2: DuckDB target
37782 let result_sf_dk = snowflake
37783 .transpile(
37784 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37785 DialectType::DuckDB,
37786 )
37787 .unwrap();
37788 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
37789
37790 // Step 3: GET_PATH directly
37791 let result_gp = snowflake
37792 .transpile(
37793 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
37794 DialectType::DuckDB,
37795 )
37796 .unwrap();
37797 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
37798 }
37799
37800 #[test]
37801 fn test_function_transformation_postgres() {
37802 // IFNULL should be transformed to COALESCE in PostgreSQL
37803 let dialect = Dialect::get(DialectType::Generic);
37804 let result = dialect
37805 .transpile("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
37806 .unwrap();
37807 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37808
37809 // NVL should also be transformed to COALESCE
37810 let result = dialect
37811 .transpile("SELECT NVL(a, b)", DialectType::PostgreSQL)
37812 .unwrap();
37813 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37814 }
37815
37816 #[test]
37817 fn test_hive_cast_to_trycast() {
37818 // Hive CAST should become TRY_CAST for targets that support it
37819 let hive = Dialect::get(DialectType::Hive);
37820 let result = hive
37821 .transpile("CAST(1 AS INT)", DialectType::DuckDB)
37822 .unwrap();
37823 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
37824
37825 let result = hive
37826 .transpile("CAST(1 AS INT)", DialectType::Presto)
37827 .unwrap();
37828 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
37829 }
37830
37831 #[test]
37832 fn test_hive_array_identity() {
37833 // Hive ARRAY<DATE> should preserve angle bracket syntax
37834 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
37835 let hive = Dialect::get(DialectType::Hive);
37836
37837 // Test via transpile (this works)
37838 let result = hive.transpile(sql, DialectType::Hive).unwrap();
37839 eprintln!("Hive ARRAY via transpile: {}", result[0]);
37840 assert!(
37841 result[0].contains("ARRAY<DATE>"),
37842 "transpile: Expected ARRAY<DATE>, got: {}",
37843 result[0]
37844 );
37845
37846 // Test via parse -> transform -> generate (identity test path)
37847 let ast = hive.parse(sql).unwrap();
37848 let transformed = hive.transform(ast[0].clone()).unwrap();
37849 let output = hive.generate(&transformed).unwrap();
37850 eprintln!("Hive ARRAY via identity path: {}", output);
37851 assert!(
37852 output.contains("ARRAY<DATE>"),
37853 "identity path: Expected ARRAY<DATE>, got: {}",
37854 output
37855 );
37856 }
37857
37858 #[test]
37859 fn test_starrocks_delete_between_expansion() {
37860 // StarRocks doesn't support BETWEEN in DELETE statements
37861 let dialect = Dialect::get(DialectType::Generic);
37862
37863 // BETWEEN should be expanded to >= AND <= in DELETE
37864 let result = dialect
37865 .transpile(
37866 "DELETE FROM t WHERE a BETWEEN b AND c",
37867 DialectType::StarRocks,
37868 )
37869 .unwrap();
37870 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
37871
37872 // NOT BETWEEN should be expanded to < OR > in DELETE
37873 let result = dialect
37874 .transpile(
37875 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
37876 DialectType::StarRocks,
37877 )
37878 .unwrap();
37879 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
37880
37881 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
37882 let result = dialect
37883 .transpile(
37884 "SELECT * FROM t WHERE a BETWEEN b AND c",
37885 DialectType::StarRocks,
37886 )
37887 .unwrap();
37888 assert!(
37889 result[0].contains("BETWEEN"),
37890 "BETWEEN should be preserved in SELECT"
37891 );
37892 }
37893
37894 #[test]
37895 fn test_snowflake_ltrim_rtrim_parse() {
37896 let sf = Dialect::get(DialectType::Snowflake);
37897 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
37898 let result = sf.transpile(sql, DialectType::DuckDB);
37899 match &result {
37900 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
37901 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
37902 }
37903 assert!(
37904 result.is_ok(),
37905 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
37906 result.err()
37907 );
37908 }
37909
37910 #[test]
37911 fn test_duckdb_count_if_parse() {
37912 let duck = Dialect::get(DialectType::DuckDB);
37913 let sql = "COUNT_IF(x)";
37914 let result = duck.transpile(sql, DialectType::DuckDB);
37915 match &result {
37916 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
37917 Err(e) => eprintln!("COUNT_IF error: {}", e),
37918 }
37919 assert!(
37920 result.is_ok(),
37921 "Expected successful parse of COUNT_IF(x), got error: {:?}",
37922 result.err()
37923 );
37924 }
37925
37926 #[test]
37927 fn test_tsql_cast_tinyint_parse() {
37928 let tsql = Dialect::get(DialectType::TSQL);
37929 let sql = "CAST(X AS TINYINT)";
37930 let result = tsql.transpile(sql, DialectType::DuckDB);
37931 match &result {
37932 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
37933 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
37934 }
37935 assert!(
37936 result.is_ok(),
37937 "Expected successful transpile, got error: {:?}",
37938 result.err()
37939 );
37940 }
37941
37942 #[test]
37943 fn test_pg_hash_bitwise_xor() {
37944 let dialect = Dialect::get(DialectType::PostgreSQL);
37945 let result = dialect.transpile("x # y", DialectType::PostgreSQL).unwrap();
37946 assert_eq!(result[0], "x # y");
37947 }
37948
37949 #[test]
37950 fn test_pg_array_to_duckdb() {
37951 let dialect = Dialect::get(DialectType::PostgreSQL);
37952 let result = dialect
37953 .transpile("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
37954 .unwrap();
37955 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
37956 }
37957
37958 #[test]
37959 fn test_array_remove_bigquery() {
37960 let dialect = Dialect::get(DialectType::Generic);
37961 let result = dialect
37962 .transpile("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
37963 .unwrap();
37964 assert_eq!(
37965 result[0],
37966 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
37967 );
37968 }
37969
37970 #[test]
37971 fn test_map_clickhouse_case() {
37972 let dialect = Dialect::get(DialectType::Generic);
37973 let parsed = dialect
37974 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
37975 .unwrap();
37976 eprintln!("MAP parsed: {:?}", parsed);
37977 let result = dialect
37978 .transpile(
37979 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
37980 DialectType::ClickHouse,
37981 )
37982 .unwrap();
37983 eprintln!("MAP result: {}", result[0]);
37984 }
37985
37986 #[test]
37987 fn test_generate_date_array_presto() {
37988 let dialect = Dialect::get(DialectType::Generic);
37989 let result = dialect.transpile(
37990 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37991 DialectType::Presto,
37992 ).unwrap();
37993 eprintln!("GDA -> Presto: {}", result[0]);
37994 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
37995 }
37996
37997 #[test]
37998 fn test_generate_date_array_postgres() {
37999 let dialect = Dialect::get(DialectType::Generic);
38000 let result = dialect.transpile(
38001 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38002 DialectType::PostgreSQL,
38003 ).unwrap();
38004 eprintln!("GDA -> PostgreSQL: {}", result[0]);
38005 }
38006
38007 #[test]
38008 fn test_generate_date_array_snowflake() {
38009 let dialect = Dialect::get(DialectType::Generic);
38010 let result = dialect
38011 .transpile(
38012 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38013 DialectType::Snowflake,
38014 )
38015 .unwrap();
38016 eprintln!("GDA -> Snowflake: {}", result[0]);
38017 }
38018
38019 #[test]
38020 fn test_array_length_generate_date_array_snowflake() {
38021 let dialect = Dialect::get(DialectType::Generic);
38022 let result = dialect.transpile(
38023 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38024 DialectType::Snowflake,
38025 ).unwrap();
38026 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
38027 }
38028
38029 #[test]
38030 fn test_generate_date_array_mysql() {
38031 let dialect = Dialect::get(DialectType::Generic);
38032 let result = dialect.transpile(
38033 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38034 DialectType::MySQL,
38035 ).unwrap();
38036 eprintln!("GDA -> MySQL: {}", result[0]);
38037 }
38038
38039 #[test]
38040 fn test_generate_date_array_redshift() {
38041 let dialect = Dialect::get(DialectType::Generic);
38042 let result = dialect.transpile(
38043 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38044 DialectType::Redshift,
38045 ).unwrap();
38046 eprintln!("GDA -> Redshift: {}", result[0]);
38047 }
38048
38049 #[test]
38050 fn test_generate_date_array_tsql() {
38051 let dialect = Dialect::get(DialectType::Generic);
38052 let result = dialect.transpile(
38053 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38054 DialectType::TSQL,
38055 ).unwrap();
38056 eprintln!("GDA -> TSQL: {}", result[0]);
38057 }
38058
38059 #[test]
38060 fn test_struct_colon_syntax() {
38061 let dialect = Dialect::get(DialectType::Generic);
38062 // Test without colon first
38063 let result = dialect.transpile(
38064 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
38065 DialectType::ClickHouse,
38066 );
38067 match result {
38068 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
38069 Err(e) => eprintln!("STRUCT no colon error: {}", e),
38070 }
38071 // Now test with colon
38072 let result = dialect.transpile(
38073 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
38074 DialectType::ClickHouse,
38075 );
38076 match result {
38077 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
38078 Err(e) => eprintln!("STRUCT colon error: {}", e),
38079 }
38080 }
38081
38082 #[test]
38083 fn test_generate_date_array_cte_wrapped_mysql() {
38084 let dialect = Dialect::get(DialectType::Generic);
38085 let result = dialect.transpile(
38086 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
38087 DialectType::MySQL,
38088 ).unwrap();
38089 eprintln!("GDA CTE -> MySQL: {}", result[0]);
38090 }
38091
38092 #[test]
38093 fn test_generate_date_array_cte_wrapped_tsql() {
38094 let dialect = Dialect::get(DialectType::Generic);
38095 let result = dialect.transpile(
38096 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
38097 DialectType::TSQL,
38098 ).unwrap();
38099 eprintln!("GDA CTE -> TSQL: {}", result[0]);
38100 }
38101
38102 #[test]
38103 fn test_decode_literal_no_null_check() {
38104 // Oracle DECODE with all literals should produce simple equality, no IS NULL
38105 let dialect = Dialect::get(DialectType::Oracle);
38106 let result = dialect
38107 .transpile("SELECT decode(1,2,3,4)", DialectType::DuckDB)
38108 .unwrap();
38109 assert_eq!(
38110 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
38111 "Literal DECODE should not have IS NULL checks"
38112 );
38113 }
38114
38115 #[test]
38116 fn test_decode_column_vs_literal_no_null_check() {
38117 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
38118 let dialect = Dialect::get(DialectType::Oracle);
38119 let result = dialect
38120 .transpile("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
38121 .unwrap();
38122 assert_eq!(
38123 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
38124 "Column vs literal DECODE should not have IS NULL checks"
38125 );
38126 }
38127
38128 #[test]
38129 fn test_decode_column_vs_column_keeps_null_check() {
38130 // Oracle DECODE with column vs column should keep null-safe comparison
38131 let dialect = Dialect::get(DialectType::Oracle);
38132 let result = dialect
38133 .transpile("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
38134 .unwrap();
38135 assert!(
38136 result[0].contains("IS NULL"),
38137 "Column vs column DECODE should have IS NULL checks, got: {}",
38138 result[0]
38139 );
38140 }
38141
38142 #[test]
38143 fn test_decode_null_search() {
38144 // Oracle DECODE with NULL search should use IS NULL
38145 let dialect = Dialect::get(DialectType::Oracle);
38146 let result = dialect
38147 .transpile("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
38148 .unwrap();
38149 assert_eq!(
38150 result[0],
38151 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
38152 );
38153 }
38154
38155 // =========================================================================
38156 // REGEXP function transpilation tests
38157 // =========================================================================
38158
38159 #[test]
38160 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
38161 let dialect = Dialect::get(DialectType::Snowflake);
38162 let result = dialect
38163 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
38164 .unwrap();
38165 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38166 }
38167
38168 #[test]
38169 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
38170 let dialect = Dialect::get(DialectType::Snowflake);
38171 let result = dialect
38172 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
38173 .unwrap();
38174 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38175 }
38176
38177 #[test]
38178 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
38179 let dialect = Dialect::get(DialectType::Snowflake);
38180 let result = dialect
38181 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
38182 .unwrap();
38183 assert_eq!(
38184 result[0],
38185 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
38186 );
38187 }
38188
38189 #[test]
38190 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
38191 let dialect = Dialect::get(DialectType::Snowflake);
38192 let result = dialect
38193 .transpile(
38194 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
38195 DialectType::DuckDB,
38196 )
38197 .unwrap();
38198 assert_eq!(
38199 result[0],
38200 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
38201 );
38202 }
38203
38204 #[test]
38205 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
38206 let dialect = Dialect::get(DialectType::Snowflake);
38207 let result = dialect
38208 .transpile(
38209 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
38210 DialectType::DuckDB,
38211 )
38212 .unwrap();
38213 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38214 }
38215
38216 #[test]
38217 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
38218 let dialect = Dialect::get(DialectType::Snowflake);
38219 let result = dialect
38220 .transpile(
38221 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
38222 DialectType::DuckDB,
38223 )
38224 .unwrap();
38225 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38226 }
38227
38228 #[test]
38229 fn test_regexp_substr_snowflake_identity_strip_group0() {
38230 let dialect = Dialect::get(DialectType::Snowflake);
38231 let result = dialect
38232 .transpile(
38233 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
38234 DialectType::Snowflake,
38235 )
38236 .unwrap();
38237 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
38238 }
38239
38240 #[test]
38241 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
38242 let dialect = Dialect::get(DialectType::Snowflake);
38243 let result = dialect
38244 .transpile(
38245 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
38246 DialectType::DuckDB,
38247 )
38248 .unwrap();
38249 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
38250 }
38251
38252 #[test]
38253 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
38254 let dialect = Dialect::get(DialectType::Snowflake);
38255 let result = dialect
38256 .transpile(
38257 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
38258 DialectType::DuckDB,
38259 )
38260 .unwrap();
38261 assert_eq!(
38262 result[0],
38263 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
38264 );
38265 }
38266
38267 #[test]
38268 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
38269 let dialect = Dialect::get(DialectType::Snowflake);
38270 let result = dialect
38271 .transpile(
38272 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
38273 DialectType::DuckDB,
38274 )
38275 .unwrap();
38276 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
38277 }
38278
38279 #[test]
38280 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
38281 let dialect = Dialect::get(DialectType::Snowflake);
38282 let result = dialect
38283 .transpile(
38284 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
38285 DialectType::DuckDB,
38286 )
38287 .unwrap();
38288 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
38289 }
38290
38291 #[test]
38292 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
38293 let dialect = Dialect::get(DialectType::Snowflake);
38294 let result = dialect
38295 .transpile(
38296 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
38297 DialectType::Snowflake,
38298 )
38299 .unwrap();
38300 assert_eq!(
38301 result[0],
38302 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
38303 );
38304 }
38305
38306 #[test]
38307 fn test_regexp_count_snowflake_to_duckdb_2arg() {
38308 let dialect = Dialect::get(DialectType::Snowflake);
38309 let result = dialect
38310 .transpile("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
38311 .unwrap();
38312 assert_eq!(
38313 result[0],
38314 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
38315 );
38316 }
38317
38318 #[test]
38319 fn test_regexp_count_snowflake_to_duckdb_3arg() {
38320 let dialect = Dialect::get(DialectType::Snowflake);
38321 let result = dialect
38322 .transpile("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
38323 .unwrap();
38324 assert_eq!(
38325 result[0],
38326 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
38327 );
38328 }
38329
38330 #[test]
38331 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
38332 let dialect = Dialect::get(DialectType::Snowflake);
38333 let result = dialect
38334 .transpile(
38335 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
38336 DialectType::DuckDB,
38337 )
38338 .unwrap();
38339 assert_eq!(
38340 result[0],
38341 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
38342 );
38343 }
38344
38345 #[test]
38346 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
38347 let dialect = Dialect::get(DialectType::Snowflake);
38348 let result = dialect
38349 .transpile(
38350 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
38351 DialectType::DuckDB,
38352 )
38353 .unwrap();
38354 assert_eq!(
38355 result[0],
38356 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
38357 );
38358 }
38359
38360 #[test]
38361 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
38362 let dialect = Dialect::get(DialectType::Snowflake);
38363 let result = dialect
38364 .transpile(
38365 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
38366 DialectType::DuckDB,
38367 )
38368 .unwrap();
38369 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
38370 }
38371
38372 #[test]
38373 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
38374 let dialect = Dialect::get(DialectType::Snowflake);
38375 let result = dialect
38376 .transpile(
38377 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
38378 DialectType::DuckDB,
38379 )
38380 .unwrap();
38381 assert_eq!(
38382 result[0],
38383 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
38384 );
38385 }
38386
38387 #[test]
38388 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
38389 let dialect = Dialect::get(DialectType::Snowflake);
38390 let result = dialect
38391 .transpile(
38392 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
38393 DialectType::DuckDB,
38394 )
38395 .unwrap();
38396 assert_eq!(
38397 result[0],
38398 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
38399 );
38400 }
38401
38402 #[test]
38403 fn test_rlike_snowflake_to_duckdb_2arg() {
38404 let dialect = Dialect::get(DialectType::Snowflake);
38405 let result = dialect
38406 .transpile("SELECT RLIKE(a, b)", DialectType::DuckDB)
38407 .unwrap();
38408 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
38409 }
38410
38411 #[test]
38412 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
38413 let dialect = Dialect::get(DialectType::Snowflake);
38414 let result = dialect
38415 .transpile("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
38416 .unwrap();
38417 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
38418 }
38419
38420 #[test]
38421 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
38422 let dialect = Dialect::get(DialectType::BigQuery);
38423 let result = dialect
38424 .transpile(
38425 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
38426 DialectType::Snowflake,
38427 )
38428 .unwrap();
38429 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
38430 }
38431
38432 #[test]
38433 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
38434 let dialect = Dialect::get(DialectType::BigQuery);
38435 let result = dialect
38436 .transpile(
38437 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
38438 DialectType::Snowflake,
38439 )
38440 .unwrap();
38441 assert_eq!(
38442 result[0],
38443 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
38444 );
38445 }
38446
38447 #[test]
38448 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
38449 let dialect = Dialect::get(DialectType::Snowflake);
38450 let result = dialect
38451 .transpile("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
38452 .unwrap();
38453 assert!(
38454 result[0].contains("CASE WHEN"),
38455 "Expected CASE WHEN in result: {}",
38456 result[0]
38457 );
38458 assert!(
38459 result[0].contains("LIST_SUM"),
38460 "Expected LIST_SUM in result: {}",
38461 result[0]
38462 );
38463 }
38464
38465 #[test]
38466 fn test_array_except_generic_to_duckdb() {
38467 let dialect = Dialect::get(DialectType::Generic);
38468 let result = dialect
38469 .transpile(
38470 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38471 DialectType::DuckDB,
38472 )
38473 .unwrap();
38474 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
38475 assert!(
38476 result[0].contains("CASE WHEN"),
38477 "Expected CASE WHEN: {}",
38478 result[0]
38479 );
38480 assert!(
38481 result[0].contains("LIST_FILTER"),
38482 "Expected LIST_FILTER: {}",
38483 result[0]
38484 );
38485 assert!(
38486 result[0].contains("LIST_DISTINCT"),
38487 "Expected LIST_DISTINCT: {}",
38488 result[0]
38489 );
38490 assert!(
38491 result[0].contains("IS NOT DISTINCT FROM"),
38492 "Expected IS NOT DISTINCT FROM: {}",
38493 result[0]
38494 );
38495 assert!(
38496 result[0].contains("= 0"),
38497 "Expected = 0 filter: {}",
38498 result[0]
38499 );
38500 }
38501
38502 #[test]
38503 fn test_array_except_generic_to_snowflake() {
38504 let dialect = Dialect::get(DialectType::Generic);
38505 let result = dialect
38506 .transpile(
38507 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38508 DialectType::Snowflake,
38509 )
38510 .unwrap();
38511 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
38512 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
38513 }
38514
38515 #[test]
38516 fn test_array_except_generic_to_presto() {
38517 let dialect = Dialect::get(DialectType::Generic);
38518 let result = dialect
38519 .transpile(
38520 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38521 DialectType::Presto,
38522 )
38523 .unwrap();
38524 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
38525 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
38526 }
38527
38528 #[test]
38529 fn test_array_except_snowflake_to_duckdb() {
38530 let dialect = Dialect::get(DialectType::Snowflake);
38531 let result = dialect
38532 .transpile("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
38533 .unwrap();
38534 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
38535 assert!(
38536 result[0].contains("CASE WHEN"),
38537 "Expected CASE WHEN: {}",
38538 result[0]
38539 );
38540 assert!(
38541 result[0].contains("LIST_TRANSFORM"),
38542 "Expected LIST_TRANSFORM: {}",
38543 result[0]
38544 );
38545 }
38546
38547 #[test]
38548 fn test_array_contains_snowflake_to_snowflake() {
38549 let dialect = Dialect::get(DialectType::Snowflake);
38550 let result = dialect
38551 .transpile(
38552 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38553 DialectType::Snowflake,
38554 )
38555 .unwrap();
38556 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
38557 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
38558 }
38559
38560 #[test]
38561 fn test_array_contains_snowflake_to_duckdb() {
38562 let dialect = Dialect::get(DialectType::Snowflake);
38563 let result = dialect
38564 .transpile(
38565 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38566 DialectType::DuckDB,
38567 )
38568 .unwrap();
38569 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
38570 assert!(
38571 result[0].contains("CASE WHEN"),
38572 "Expected CASE WHEN: {}",
38573 result[0]
38574 );
38575 assert!(
38576 result[0].contains("NULLIF"),
38577 "Expected NULLIF: {}",
38578 result[0]
38579 );
38580 assert!(
38581 result[0].contains("ARRAY_CONTAINS"),
38582 "Expected ARRAY_CONTAINS: {}",
38583 result[0]
38584 );
38585 }
38586
38587 #[test]
38588 fn test_array_distinct_snowflake_to_duckdb() {
38589 let dialect = Dialect::get(DialectType::Snowflake);
38590 let result = dialect
38591 .transpile(
38592 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
38593 DialectType::DuckDB,
38594 )
38595 .unwrap();
38596 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
38597 assert!(
38598 result[0].contains("CASE WHEN"),
38599 "Expected CASE WHEN: {}",
38600 result[0]
38601 );
38602 assert!(
38603 result[0].contains("LIST_DISTINCT"),
38604 "Expected LIST_DISTINCT: {}",
38605 result[0]
38606 );
38607 assert!(
38608 result[0].contains("LIST_APPEND"),
38609 "Expected LIST_APPEND: {}",
38610 result[0]
38611 );
38612 assert!(
38613 result[0].contains("LIST_FILTER"),
38614 "Expected LIST_FILTER: {}",
38615 result[0]
38616 );
38617 }
38618}